From c2ca9bcedeb004f9d7f5d3e1aafc7b83ce6c1e3f Mon Sep 17 00:00:00 2001
From: Matt Topol
Date: Mon, 29 Jan 2024 15:39:10 -0500
Subject: [PATCH 01/74] GH-39837: [Go][Flight] Allow cloning existing cookies
in middleware (#39838)
### Rationale for this change
This is needed for https://github.com/apache/arrow-adbc/issues/1194 to facilitate better connection handling for flight clients in ADBC by copying the existing cookies over when creating a sub-client.
### What changes are included in this PR?
Creating a `Clone` method on the `CookieMiddleware` so that a user can create and hold a reference to a specific cookie middleware instance and then create new ones on the fly that copy over the existing cookies at that moment.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
No
* Closes: #39837
Authored-by: Matt Topol
Signed-off-by: Matt Topol
---
go/arrow/flight/cookie_middleware.go | 24 +++++++++
go/arrow/flight/cookie_middleware_test.go | 60 +++++++++++++++++++++++
2 files changed, 84 insertions(+)
diff --git a/go/arrow/flight/cookie_middleware.go b/go/arrow/flight/cookie_middleware.go
index 27754a13b829a..39c86d8303434 100644
--- a/go/arrow/flight/cookie_middleware.go
+++ b/go/arrow/flight/cookie_middleware.go
@@ -23,6 +23,7 @@ import (
"sync"
"time"
+ "golang.org/x/exp/maps"
"google.golang.org/grpc/metadata"
)
@@ -40,11 +41,34 @@ func NewClientCookieMiddleware() ClientMiddleware {
return CreateClientMiddleware(&clientCookieMiddleware{jar: make(map[string]http.Cookie)})
}
+func NewCookieMiddleware() CookieMiddleware {
+ return &clientCookieMiddleware{jar: make(map[string]http.Cookie)}
+}
+
+// CookieMiddleware is a go-routine safe middleware for flight clients
+// which properly handles Set-Cookie headers for storing cookies.
+// This can be passed into `CreateClientMiddleware` to create a new
+// middleware object. You can also clone it to create middleware for a
+// new client which starts with the same cookies.
+type CookieMiddleware interface {
+ CustomClientMiddleware
+ // Clone creates a new CookieMiddleware that starts out with the same
+ // cookies that this one already has. This is useful when creating a
+ // new client connection for the same server.
+ Clone() CookieMiddleware
+}
+
type clientCookieMiddleware struct {
jar map[string]http.Cookie
mx sync.Mutex
}
+func (cc *clientCookieMiddleware) Clone() CookieMiddleware {
+ cc.mx.Lock()
+ defer cc.mx.Unlock()
+ return &clientCookieMiddleware{jar: maps.Clone(cc.jar)}
+}
+
func (cc *clientCookieMiddleware) StartCall(ctx context.Context) context.Context {
cc.mx.Lock()
defer cc.mx.Unlock()
diff --git a/go/arrow/flight/cookie_middleware_test.go b/go/arrow/flight/cookie_middleware_test.go
index 0adf4927652d4..4007d056b2c99 100644
--- a/go/arrow/flight/cookie_middleware_test.go
+++ b/go/arrow/flight/cookie_middleware_test.go
@@ -239,3 +239,63 @@ func TestCookieExpiration(t *testing.T) {
cookieMiddleware.expectedCookies = map[string]string{}
makeReq(client, t)
}
+
+func TestCookiesClone(t *testing.T) {
+ cookieMiddleware := &serverAddCookieMiddleware{}
+
+ s := flight.NewServerWithMiddleware([]flight.ServerMiddleware{
+ flight.CreateServerMiddleware(cookieMiddleware),
+ })
+ s.Init("localhost:0")
+ f := &flightServer{}
+ s.RegisterFlightService(f)
+
+ go s.Serve()
+ defer s.Shutdown()
+
+ makeReq := func(c flight.Client, t *testing.T) {
+ flightStream, err := c.ListFlights(context.Background(), &flight.Criteria{})
+ assert.NoError(t, err)
+
+ for {
+ _, err := flightStream.Recv()
+ if err != nil {
+ if errors.Is(err, io.EOF) {
+ break
+ }
+ assert.NoError(t, err)
+ }
+ }
+ }
+
+ credsOpt := grpc.WithTransportCredentials(insecure.NewCredentials())
+ cookies := flight.NewCookieMiddleware()
+ client1, err := flight.NewClientWithMiddleware(s.Addr().String(), nil,
+ []flight.ClientMiddleware{flight.CreateClientMiddleware(cookies)}, credsOpt)
+ require.NoError(t, err)
+ defer client1.Close()
+
+ // set cookies
+ cookieMiddleware.cookies = []*http.Cookie{
+ {Name: "foo", Value: "bar"},
+ {Name: "foo2", Value: "bar2", MaxAge: 1},
+ }
+ makeReq(client1, t)
+
+ // validate set
+ cookieMiddleware.expectedCookies = map[string]string{
+ "foo": "bar", "foo2": "bar2",
+ }
+ makeReq(client1, t)
+
+ client2, err := flight.NewClientWithMiddleware(s.Addr().String(), nil,
+ []flight.ClientMiddleware{flight.CreateClientMiddleware(cookies.Clone())}, credsOpt)
+ require.NoError(t, err)
+ defer client2.Close()
+
+ // validate clone worked
+ cookieMiddleware.expectedCookies = map[string]string{
+ "foo": "bar", "foo2": "bar2",
+ }
+ makeReq(client2, t)
+}
From fc3278ffb78e6f4f79cd20160bf911efa5a09ba1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 06:01:22 +0900
Subject: [PATCH 02/74] MINOR: [Java] Bump org.immutables:value from 2.8.2 to
2.10.0 in /java (#39831)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Bumps [org.immutables:value](https://github.com/immutables/immutables) from 2.8.2 to 2.10.0.
Release notes
Sourced from org.immutables:value's releases.
2.10.0
JakartaEE support
Style flag jakarta = true
Mainly package change for annotations and types like Validator
Miscellaneous
- JDK9 unmodifiable collections for
List
, Set
, Map
, style flag jdk9Collections = true
- Suppress from method, style flag
from = ""
- Non-strict modifiables allows reading unset attributes, style flag
strictModifiables = false
- Fixes in nested type_use annotations.
- Performance: better initial capacity for collections
- Refinements and fixes to Criteria modules
- Plus many other refinements and maintance, see below
Workarounds for Gradle
- imports for not-yet-generated types : add
options.sourcepath
- disable incremental compilation (
options.incremental
), may also help is some complex cases
What's Changed (since some last year's release)
New Contributors
Full Changelog: https://github.com/immutables/immutables/compare/2.9.3...2.10.0
2.9.3
Maintenance release
What's Changed
... (truncated)
Commits
[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.immutables:value&package-manager=maven&previous-version=2.8.2&new-version=2.10.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)
Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.
[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)
---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei
---
java/pom.xml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/java/pom.xml b/java/pom.xml
index 3951f1c1bc8ed..2423e2d495d11 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -438,7 +438,7 @@
org.immutables
value
- 2.8.2
+ 2.10.0
@@ -653,7 +653,7 @@
org.immutables
value
- 2.8.2
+ 2.10.0
provided
From 7fd59739fddf4b614c68d57e24068542b4cf2884 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 06:01:42 +0900
Subject: [PATCH 03/74] MINOR: [Java] Bump
org.apache.maven.plugins:maven-gpg-plugin from 1.5 to 3.1.0 in /java (#39832)
Bumps [org.apache.maven.plugins:maven-gpg-plugin](https://github.com/apache/maven-gpg-plugin) from 1.5 to 3.1.0.
Commits
699e2ad
[maven-release-plugin] prepare release maven-gpg-plugin-3.1.0
f314f8e
[MGPG-97] use gpgverify plugin to check dependencies signatures
bad6b57
[MGPG-96] add INFO message
0498a82
[MGPG-95] don't GPG-sign .sigstore signatures
09b5be9
Auto-link MGPG Jira
1e0472f
extract FilesCollector
af9ccfd
[MGPG-94] Ignore reformatting
5e51734
[MGPG-94] Integration tests - convert and reformat bsh to groovy
955ea0e
[MGPG-94] Reformat code
e160f43
[MGPG-94] Bump maven-plugins from 36 to 39
- Additional commits viewable in compare view
[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-gpg-plugin&package-manager=maven&previous-version=1.5&new-version=3.1.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)
Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.
[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)
---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei
---
java/gandiva/pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml
index d0290b6814ed5..6337efcf7e348 100644
--- a/java/gandiva/pom.xml
+++ b/java/gandiva/pom.xml
@@ -96,7 +96,7 @@
org.apache.maven.plugins
maven-gpg-plugin
- 1.5
+ 3.1.0
sign-artifacts
From 3b8b700348f5d73fa4cfdb2780b0bde5d83a7f22 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 06:02:06 +0900
Subject: [PATCH 04/74] MINOR: [Java] Bump org.apache.hadoop:hadoop-common from
2.7.1 to 3.3.6 in /java (#39833)
Bumps org.apache.hadoop:hadoop-common from 2.7.1 to 3.3.6.
[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.hadoop:hadoop-common&package-manager=maven&previous-version=2.7.1&new-version=3.3.6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)
Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.
[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)
---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei
---
java/adapter/orc/pom.xml | 2 +-
java/pom.xml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index 265a9a71b80e2..79e51470a426e 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -75,7 +75,7 @@
org.apache.hadoop
hadoop-common
- 3.3.3
+ 3.3.6
test
diff --git a/java/pom.xml b/java/pom.xml
index 2423e2d495d11..3947f76cae849 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -37,7 +37,7 @@
1.60.0
3.23.1
2.16.0
- 2.7.1
+ 3.3.6
23.5.26
1.11.3
From 91d65b79f71a1be6a0bf7426e0ee91dd2e65a852 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 06:02:31 +0900
Subject: [PATCH 05/74] MINOR: [Java] Bump io.netty:netty-bom from
4.1.105.Final to 4.1.106.Final in /java (#39834)
Bumps [io.netty:netty-bom](https://github.com/netty/netty) from 4.1.105.Final to 4.1.106.Final.
Commits
9d0ec7b
[maven-release-plugin] prepare release netty-4.1.106.Final
e2859f4
Short-circuit ByteBuf::release (#13782)
d9ca50d
Prevent sharing the index of the continuation frame header ByteBuf. (#13786)
0e7c27c
DnsNameResolver: Fail query if id space is exhausted (#13784)
b194741
[maven-release-plugin] prepare for next development iteration
- See full diff in compare view
[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=io.netty:netty-bom&package-manager=maven&previous-version=4.1.105.Final&new-version=4.1.106.Final)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)
Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.
[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)
---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei
---
java/pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/java/pom.xml b/java/pom.xml
index 3947f76cae849..4888f833df096 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -33,7 +33,7 @@
5.10.1
2.0.11
33.0.0-jre
- 4.1.105.Final
+ 4.1.106.Final
1.60.0
3.23.1
2.16.0
From 63498c2891c757aca016305c61e4a0ba82faed2b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 06:02:55 +0900
Subject: [PATCH 06/74] MINOR: [Java] Bump
org.apache.maven.plugins:maven-enforcer-plugin from 3.0.0-M2 to 3.4.1 in
/java (#39835)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Bumps [org.apache.maven.plugins:maven-enforcer-plugin](https://github.com/apache/maven-enforcer) from 3.0.0-M2 to 3.4.1.
Release notes
Sourced from org.apache.maven.plugins:maven-enforcer-plugin's releases.
3.4.1
🐛 Bug Fixes
👻 Maintenance
3.4.0
🚀 New features and improvements
🐛 Bug Fixes
📦 Dependency updates
📝 Documentation updates
- Clarify availability of AbstractEnforcerRule (#278)
@kwin
👻 Maintenance
- Bump org.junit:junit-bom from 5.9.3 to 5.10.0 (#280)
@dependabot
- Bump snappy-java from 1.1.8.3 to 1.1.10.1 in /maven-enforcer-plugin/src/it/projects/dependency-convergence_transitive_provided/module1 (#273)
@dependabot
- [MNG-6829] - Replace StringUtils#isEmpty(String) and #isNotEmpty(String) (#272)
@timtebeek
3.3.0
... (truncated)
Commits
d8a21ee
[maven-release-plugin] prepare release enforcer-3.4.1
66250c0
[MENFORCER-491] Fix plugin documentation generation
5d32e6c
[MENFORCER-490] Declare maven-enforcer-plugin dependencies (#285)
d258109
[MENFORCER-490] Declare org.eclipse.sisu.plexus dependencies (#283)
2aa71e7
[MENFORCER-490] Declare maven-enforcer-extension dependencies (#284)
d4ec8e1
[MENFORCER-490] Declare maven-enforcer-extension dependencies (#282)
b35e4a0
[maven-release-plugin] prepare for next development iteration
3d365f7
[maven-release-plugin] prepare release enforcer-3.4.0
5feb93a
[MENFORCER-489] Bump commons-lang3 from 3.12.0 to 3.13.0
8f2de47
Bump org.junit:junit-bom from 5.9.3 to 5.10.0
- Additional commits viewable in compare view
[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-enforcer-plugin&package-manager=maven&previous-version=3.0.0-M2&new-version=3.4.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)
Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.
[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)
---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei
---
java/pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/java/pom.xml b/java/pom.xml
index 4888f833df096..3e595648ed085 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -445,7 +445,7 @@
maven-enforcer-plugin
- 3.0.0-M2
+ 3.4.1
org.apache.maven.plugins
From b778ace6622614035acc1bbe17b06bdc8141d9fe Mon Sep 17 00:00:00 2001
From: Sutou Kouhei
Date: Tue, 30 Jan 2024 11:54:57 +0900
Subject: [PATCH 07/74] GH-39841: [GLib] Add support for GLib 2.56 again
(#39842)
### Rationale for this change
It's still used in CentOS 7 and AlmaLinux 8.
### What changes are included in this PR?
Don't use `g_time_zone_get_identifier()` with GLib < 2.58.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* Closes: #39841
Authored-by: Sutou Kouhei
Signed-off-by: Sutou Kouhei
---
c_glib/arrow-glib/basic-data-type.cpp | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp
index 0de9466eee456..98b2c92104507 100644
--- a/c_glib/arrow-glib/basic-data-type.cpp
+++ b/c_glib/arrow-glib/basic-data-type.cpp
@@ -1212,7 +1212,8 @@ garrow_timestamp_data_type_class_init(GArrowTimestampDataTypeClass *klass)
/**
* garrow_timestamp_data_type_new:
* @unit: The unit of the timestamp data.
- * @time_zone: (nullable): The time zone of the timestamp data.
+ * @time_zone: (nullable): The time zone of the timestamp data. If based GLib
+ * is less than 2.58, this is ignored.
*
* Returns: A newly created the number of
* seconds/milliseconds/microseconds/nanoseconds since UNIX epoch in
@@ -1226,9 +1227,11 @@ garrow_timestamp_data_type_new(GArrowTimeUnit unit,
{
auto arrow_unit = garrow_time_unit_to_raw(unit);
std::string arrow_timezone;
+#if GLIB_CHECK_VERSION(2, 58, 0)
if (time_zone) {
arrow_timezone = g_time_zone_get_identifier(time_zone);
}
+#endif
auto arrow_data_type = arrow::timestamp(arrow_unit, arrow_timezone);
auto data_type =
GARROW_TIMESTAMP_DATA_TYPE(g_object_new(GARROW_TYPE_TIMESTAMP_DATA_TYPE,
From c6ab28677ddf22799f3db277137708ac5b070acd Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche
Date: Tue, 30 Jan 2024 09:16:53 +0100
Subject: [PATCH 08/74] GH-39640: [Docs] Pin pydata-sphinx-theme to 0.14.*
(#39758)
### Rationale for this change
Fixing the pinning syntax so we get the latest 0.14.x version (which is currently 0.14.4)
* Closes: #39640
Authored-by: Joris Van den Bossche
Signed-off-by: Joris Van den Bossche
---
ci/conda_env_sphinx.txt | 2 +-
docs/requirements.txt | 2 +-
docs/source/python/api/compute.rst | 2 +-
docs/source/python/compute.rst | 4 ++--
docs/source/python/pandas.rst | 2 +-
5 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/ci/conda_env_sphinx.txt b/ci/conda_env_sphinx.txt
index d0f494d2e085d..0e50875fc1ef8 100644
--- a/ci/conda_env_sphinx.txt
+++ b/ci/conda_env_sphinx.txt
@@ -20,7 +20,7 @@ breathe
doxygen
ipython
numpydoc
-pydata-sphinx-theme=0.14.1
+pydata-sphinx-theme=0.14
sphinx-autobuild
sphinx-design
sphinx-copybutton
diff --git a/docs/requirements.txt b/docs/requirements.txt
index aee2eb662c06b..5d6fec7ddf72e 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -5,7 +5,7 @@
breathe
ipython
numpydoc
-pydata-sphinx-theme==0.14.1
+pydata-sphinx-theme~=0.14
sphinx-autobuild
sphinx-design
sphinx-copybutton
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index b879643017a90..928c607d139ce 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -590,4 +590,4 @@ User-Defined Functions
:toctree: ../generated/
register_scalar_function
- ScalarUdfContext
+ UdfContext
diff --git a/docs/source/python/compute.rst b/docs/source/python/compute.rst
index e8a5b613c6099..c02059a4f8faa 100644
--- a/docs/source/python/compute.rst
+++ b/docs/source/python/compute.rst
@@ -445,9 +445,9 @@ output type need to be defined. Using :func:`pyarrow.compute.register_scalar_fun
The implementation of a user-defined function always takes a first *context*
parameter (named ``ctx`` in the example above) which is an instance of
-:class:`pyarrow.compute.ScalarUdfContext`.
+:class:`pyarrow.compute.UdfContext`.
This context exposes several useful attributes, particularly a
-:attr:`~pyarrow.compute.ScalarUdfContext.memory_pool` to be used for
+:attr:`~pyarrow.compute.UdfContext.memory_pool` to be used for
allocations in the context of the user-defined function.
You can call a user-defined function directly using :func:`pyarrow.compute.call_function`:
diff --git a/docs/source/python/pandas.rst b/docs/source/python/pandas.rst
index fda90c4f2a58c..23a4b73bd0965 100644
--- a/docs/source/python/pandas.rst
+++ b/docs/source/python/pandas.rst
@@ -197,7 +197,7 @@ use the ``datetime64[ns]`` type in Pandas and are converted to an Arrow
.. ipython:: python
- df = pd.DataFrame({"datetime": pd.date_range("2020-01-01T00:00:00Z", freq="H", periods=3)})
+ df = pd.DataFrame({"datetime": pd.date_range("2020-01-01T00:00:00Z", freq="h", periods=3)})
df.dtypes
df
From 787afa1594586d2d556d21471647f9cd2c55b18f Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche
Date: Tue, 30 Jan 2024 12:54:19 +0100
Subject: [PATCH 09/74] GH-39651: [Python] Basic pyarrow bindings for
Binary/StringView classes (#39652)
### Rationale for this change
First step for https://github.com/apache/arrow/issues/39633: exposing the Array, DataType and Scalar classes for BinaryView and StringView, such that those can already be represented in pyarrow.
(I exposed a variant of StringBuilder as well, just for now to be able to create test data)
* Closes: #39651
Authored-by: Joris Van den Bossche
Signed-off-by: Joris Van den Bossche
---
docs/source/python/api/arrays.rst | 4 ++
docs/source/python/api/datatypes.rst | 4 ++
python/pyarrow/__init__.py | 7 ++-
python/pyarrow/array.pxi | 14 +++++
python/pyarrow/builder.pxi | 66 ++++++++++++++++++++++
python/pyarrow/includes/libarrow.pxd | 9 +++
python/pyarrow/lib.pxd | 8 +++
python/pyarrow/lib.pyx | 2 +
python/pyarrow/scalar.pxi | 10 ++++
python/pyarrow/src/arrow/python/helpers.cc | 2 +
python/pyarrow/tests/test_builder.py | 21 ++++++-
python/pyarrow/tests/test_misc.py | 4 ++
python/pyarrow/tests/test_scalars.py | 28 ++++++++-
python/pyarrow/tests/test_types.py | 8 +++
python/pyarrow/types.pxi | 32 +++++++++++
python/pyarrow/types.py | 10 ++++
16 files changed, 223 insertions(+), 6 deletions(-)
diff --git a/docs/source/python/api/arrays.rst b/docs/source/python/api/arrays.rst
index 73b5e063ff1a0..b858862dcff01 100644
--- a/docs/source/python/api/arrays.rst
+++ b/docs/source/python/api/arrays.rst
@@ -63,6 +63,8 @@ may expose data type-specific methods or properties.
FixedSizeBinaryArray
LargeBinaryArray
LargeStringArray
+ BinaryViewArray,
+ StringViewArray,
Time32Array
Time64Array
Date32Array
@@ -119,6 +121,8 @@ classes may expose data type-specific methods or properties.
FixedSizeBinaryScalar
LargeBinaryScalar
LargeStringScalar
+ BinaryViewScalar
+ StringViewScalar
Time32Scalar
Time64Scalar
Date32Scalar
diff --git a/docs/source/python/api/datatypes.rst b/docs/source/python/api/datatypes.rst
index 4066ef314234d..642c243b21af0 100644
--- a/docs/source/python/api/datatypes.rst
+++ b/docs/source/python/api/datatypes.rst
@@ -55,6 +55,8 @@ These should be used to create Arrow data types and schemas.
large_binary
large_string
large_utf8
+ binary_view
+ string_view
decimal128
list_
large_list
@@ -168,6 +170,8 @@ represents a given data type (such as ``int32``) or general category
is_large_binary
is_large_unicode
is_large_string
+ is_binary_view
+ is_string_view
is_fixed_size_binary
is_map
is_dictionary
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 9da94885ec6b2..4dbd1258d3cea 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -163,7 +163,7 @@ def print_entry(label, value):
time32, time64, timestamp, date32, date64, duration,
month_day_nano_interval,
float16, float32, float64,
- binary, string, utf8,
+ binary, string, utf8, binary_view, string_view,
large_binary, large_string, large_utf8,
decimal128, decimal256,
list_, large_list, map_, struct,
@@ -205,6 +205,7 @@ def print_entry(label, value):
FixedSizeListArray, UnionArray,
BinaryArray, StringArray,
LargeBinaryArray, LargeStringArray,
+ BinaryViewArray, StringViewArray,
FixedSizeBinaryArray,
DictionaryArray,
Date32Array, Date64Array, TimestampArray,
@@ -223,8 +224,8 @@ def print_entry(label, value):
Time32Scalar, Time64Scalar,
TimestampScalar, DurationScalar,
MonthDayNanoIntervalScalar,
- BinaryScalar, LargeBinaryScalar,
- StringScalar, LargeStringScalar,
+ BinaryScalar, LargeBinaryScalar, BinaryViewScalar,
+ StringScalar, LargeStringScalar, StringViewScalar,
FixedSizeBinaryScalar, DictionaryScalar,
MapScalar, StructScalar, UnionScalar,
RunEndEncodedScalar, ExtensionScalar)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 1416f5f4346d9..1029f3a629817 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -2942,6 +2942,12 @@ cdef class LargeStringArray(Array):
null_count, offset)
+cdef class StringViewArray(Array):
+ """
+ Concrete class for Arrow arrays of string (or utf8) view data type.
+ """
+
+
cdef class BinaryArray(Array):
"""
Concrete class for Arrow arrays of variable-sized binary data type.
@@ -2968,6 +2974,12 @@ cdef class LargeBinaryArray(Array):
return ( self.ap).total_values_length()
+cdef class BinaryViewArray(Array):
+ """
+ Concrete class for Arrow arrays of variable-sized binary view data type.
+ """
+
+
cdef class DictionaryArray(Array):
"""
Concrete class for dictionary-encoded Arrow arrays.
@@ -3669,6 +3681,8 @@ cdef dict _array_classes = {
_Type_STRING: StringArray,
_Type_LARGE_BINARY: LargeBinaryArray,
_Type_LARGE_STRING: LargeStringArray,
+ _Type_BINARY_VIEW: BinaryViewArray,
+ _Type_STRING_VIEW: StringViewArray,
_Type_DICTIONARY: DictionaryArray,
_Type_FIXED_SIZE_BINARY: FixedSizeBinaryArray,
_Type_DECIMAL128: Decimal128Array,
diff --git a/python/pyarrow/builder.pxi b/python/pyarrow/builder.pxi
index a34ea5412e14a..2af39e2c589e6 100644
--- a/python/pyarrow/builder.pxi
+++ b/python/pyarrow/builder.pxi
@@ -80,3 +80,69 @@ cdef class StringBuilder(_Weakrefable):
def __len__(self):
return self.builder.get().length()
+
+
+cdef class StringViewBuilder(_Weakrefable):
+ """
+ Builder class for UTF8 string views.
+
+ This class exposes facilities for incrementally adding string values and
+ building the null bitmap for a pyarrow.Array (type='string_view').
+ """
+ cdef:
+ unique_ptr[CStringViewBuilder] builder
+
+ def __cinit__(self, MemoryPool memory_pool=None):
+ cdef CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
+ self.builder.reset(new CStringViewBuilder(pool))
+
+ def append(self, value):
+ """
+ Append a single value to the builder.
+
+ The value can either be a string/bytes object or a null value
+ (np.nan or None).
+
+ Parameters
+ ----------
+ value : string/bytes or np.nan/None
+ The value to append to the string array builder.
+ """
+ if value is None or value is np.nan:
+ self.builder.get().AppendNull()
+ elif isinstance(value, (bytes, str)):
+ self.builder.get().Append(tobytes(value))
+ else:
+ raise TypeError('StringViewBuilder only accepts string objects')
+
+ def append_values(self, values):
+ """
+ Append all the values from an iterable.
+
+ Parameters
+ ----------
+ values : iterable of string/bytes or np.nan/None values
+ The values to append to the string array builder.
+ """
+ for value in values:
+ self.append(value)
+
+ def finish(self):
+ """
+ Return result of builder as an Array object; also resets the builder.
+
+ Returns
+ -------
+ array : pyarrow.Array
+ """
+ cdef shared_ptr[CArray] out
+ with nogil:
+ self.builder.get().Finish(&out)
+ return pyarrow_wrap_array(out)
+
+ @property
+ def null_count(self):
+ return self.builder.get().null_count()
+
+ def __len__(self):
+ return self.builder.get().length()
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 74e92594b04e5..d92f09da779b6 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -126,6 +126,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
_Type_LARGE_BINARY" arrow::Type::LARGE_BINARY"
_Type_LARGE_STRING" arrow::Type::LARGE_STRING"
_Type_FIXED_SIZE_BINARY" arrow::Type::FIXED_SIZE_BINARY"
+ _Type_BINARY_VIEW" arrow::Type::BINARY_VIEW"
+ _Type_STRING_VIEW" arrow::Type::STRING_VIEW"
_Type_LIST" arrow::Type::LIST"
_Type_LARGE_LIST" arrow::Type::LARGE_LIST"
@@ -1295,7 +1297,14 @@ cdef extern from "arrow/builder.h" namespace "arrow" nogil:
cdef cppclass CStringBuilder" arrow::StringBuilder"(CBinaryBuilder):
CStringBuilder(CMemoryPool* pool)
+ CStatus Append(const c_string& value)
+
+ cdef cppclass CBinaryViewBuilder" arrow::BinaryViewBuilder"(CArrayBuilder):
+ CBinaryViewBuilder(shared_ptr[CDataType], CMemoryPool* pool)
+ CStatus Append(const char* value, int32_t length)
+ cdef cppclass CStringViewBuilder" arrow::StringViewBuilder"(CBinaryViewBuilder):
+ CStringViewBuilder(CMemoryPool* pool)
CStatus Append(const c_string& value)
cdef cppclass CTimestampBuilder "arrow::TimestampBuilder"(CArrayBuilder):
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 58ec34addbc0a..c1104864066e9 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -445,6 +445,14 @@ cdef class BinaryArray(Array):
pass
+cdef class StringViewArray(Array):
+ pass
+
+
+cdef class BinaryViewArray(Array):
+ pass
+
+
cdef class DictionaryArray(Array):
cdef:
object _indices, _dictionary
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index 29a0bed55949c..b0368b67f790e 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -106,6 +106,8 @@ Type_STRING = _Type_STRING
Type_LARGE_BINARY = _Type_LARGE_BINARY
Type_LARGE_STRING = _Type_LARGE_STRING
Type_FIXED_SIZE_BINARY = _Type_FIXED_SIZE_BINARY
+Type_BINARY_VIEW = _Type_BINARY_VIEW
+Type_STRING_VIEW = _Type_STRING_VIEW
Type_LIST = _Type_LIST
Type_LARGE_LIST = _Type_LARGE_LIST
Type_MAP = _Type_MAP
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 9a66dc81226d4..2772acf81861c 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -665,6 +665,14 @@ cdef class LargeStringScalar(StringScalar):
pass
+cdef class BinaryViewScalar(BinaryScalar):
+ pass
+
+
+cdef class StringViewScalar(StringScalar):
+ pass
+
+
cdef class ListScalar(Scalar):
"""
Concrete class for list-like scalars.
@@ -1051,8 +1059,10 @@ cdef dict _scalar_classes = {
_Type_BINARY: BinaryScalar,
_Type_LARGE_BINARY: LargeBinaryScalar,
_Type_FIXED_SIZE_BINARY: FixedSizeBinaryScalar,
+ _Type_BINARY_VIEW: BinaryViewScalar,
_Type_STRING: StringScalar,
_Type_LARGE_STRING: LargeStringScalar,
+ _Type_STRING_VIEW: StringViewScalar,
_Type_LIST: ListScalar,
_Type_LARGE_LIST: LargeListScalar,
_Type_FIXED_SIZE_LIST: FixedSizeListScalar,
diff --git a/python/pyarrow/src/arrow/python/helpers.cc b/python/pyarrow/src/arrow/python/helpers.cc
index c266abc169d49..2c86c86a919be 100644
--- a/python/pyarrow/src/arrow/python/helpers.cc
+++ b/python/pyarrow/src/arrow/python/helpers.cc
@@ -63,6 +63,8 @@ std::shared_ptr GetPrimitiveType(Type::type type) {
GET_PRIMITIVE_TYPE(STRING, utf8);
GET_PRIMITIVE_TYPE(LARGE_BINARY, large_binary);
GET_PRIMITIVE_TYPE(LARGE_STRING, large_utf8);
+ GET_PRIMITIVE_TYPE(BINARY_VIEW, binary_view);
+ GET_PRIMITIVE_TYPE(STRING_VIEW, utf8_view);
GET_PRIMITIVE_TYPE(INTERVAL_MONTH_DAY_NANO, month_day_nano_interval);
default:
return nullptr;
diff --git a/python/pyarrow/tests/test_builder.py b/python/pyarrow/tests/test_builder.py
index 50d801026b7d8..abc8a0013df37 100644
--- a/python/pyarrow/tests/test_builder.py
+++ b/python/pyarrow/tests/test_builder.py
@@ -20,7 +20,7 @@
import numpy as np
import pyarrow as pa
-from pyarrow.lib import StringBuilder
+from pyarrow.lib import StringBuilder, StringViewBuilder
def test_weakref():
@@ -65,3 +65,22 @@ def test_string_builder_append_after_finish():
sbuilder.append("No effect")
expected = [None, None, "text", None, "other text"]
assert arr.to_pylist() == expected
+
+
+def test_string_view_builder():
+ builder = StringViewBuilder()
+ builder.append(b"a byte string")
+ builder.append("a string")
+ builder.append("a longer not-inlined string")
+ builder.append(np.nan)
+ builder.append_values([None, "text"])
+ assert len(builder) == 6
+ assert builder.null_count == 2
+ arr = builder.finish()
+ assert isinstance(arr, pa.Array)
+ assert arr.null_count == 2
+ assert arr.type == 'string_view'
+ expected = [
+ "a byte string", "a string", "a longer not-inlined string", None, None, "text"
+ ]
+ assert arr.to_pylist() == expected
diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py
index 8b8c50882b749..8cec8783280dd 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -185,6 +185,8 @@ def test_set_timezone_db_path_non_windows():
pa.UnionArray,
pa.BinaryArray,
pa.StringArray,
+ pa.BinaryViewArray,
+ pa.StringViewArray,
pa.FixedSizeBinaryArray,
pa.DictionaryArray,
pa.Date32Array,
@@ -221,6 +223,8 @@ def test_set_timezone_db_path_non_windows():
pa.StringScalar,
pa.BinaryScalar,
pa.FixedSizeBinaryScalar,
+ pa.BinaryViewScalar,
+ pa.StringViewScalar,
pa.ListScalar,
pa.LargeListScalar,
pa.MapScalar,
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 74dee59558239..4a239b23d5676 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -51,6 +51,9 @@
(b"bytes", None, pa.BinaryScalar),
("largestring", pa.large_string(), pa.LargeStringScalar),
(b"largebytes", pa.large_binary(), pa.LargeBinaryScalar),
+ # TODO(GH-39633) pa.scalar(..) requires python->arrow conversion to be implemented
+ # ("string_view", pa.string_view(), pa.StringViewScalar),
+ # (b"bytes_view", pa.binary_view(), pa.BinaryViewScalar),
(b"abc", pa.binary(3), pa.FixedSizeBinaryScalar),
([1, 2, 3], None, pa.ListScalar),
([1, 2, 3, 4], pa.large_list(pa.int8()), pa.LargeListScalar),
@@ -488,7 +491,8 @@ def test_month_day_nano_interval():
@pytest.mark.parametrize('value', ['foo', 'mañana'])
@pytest.mark.parametrize(('ty', 'scalar_typ'), [
(pa.string(), pa.StringScalar),
- (pa.large_string(), pa.LargeStringScalar)
+ (pa.large_string(), pa.LargeStringScalar),
+ # (pa.string_view(), pa.StringViewScalar),
])
def test_string(value, ty, scalar_typ):
s = pa.scalar(value, type=ty)
@@ -503,10 +507,30 @@ def test_string(value, ty, scalar_typ):
assert buf.to_pybytes() == value.encode()
+@pytest.mark.parametrize('value', ['foo', 'mañana'])
+def test_string_view(value):
+ # TODO: replace with normal scalar construction
+ builder = pa.lib.StringViewBuilder()
+ builder.append(value)
+ arr = builder.finish()
+
+ s = arr[0]
+ assert isinstance(s, pa.StringViewScalar)
+ assert s.as_py() == value
+ assert s.as_py() != 'something'
+ assert repr(value) in repr(s)
+ assert str(s) == str(value)
+
+ buf = s.as_buffer()
+ assert isinstance(buf, pa.Buffer)
+ assert buf.to_pybytes() == value.encode()
+
+
@pytest.mark.parametrize('value', [b'foo', b'bar'])
@pytest.mark.parametrize(('ty', 'scalar_typ'), [
(pa.binary(), pa.BinaryScalar),
- (pa.large_binary(), pa.LargeBinaryScalar)
+ (pa.large_binary(), pa.LargeBinaryScalar),
+ # (pa.binary_view(), pa.BinaryViewScalar),
])
def test_binary(value, ty, scalar_typ):
s = pa.scalar(value, type=ty)
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index c8a52c6b626c2..a5ab3128dc874 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -61,6 +61,8 @@ def get_many_types():
pa.binary(10),
pa.large_string(),
pa.large_binary(),
+ pa.string_view(),
+ pa.binary_view(),
pa.list_(pa.int32()),
pa.list_(pa.int32(), 2),
pa.large_list(pa.uint16()),
@@ -244,6 +246,12 @@ def test_is_binary_string():
assert types.is_fixed_size_binary(pa.binary(5))
assert not types.is_fixed_size_binary(pa.binary())
+ assert types.is_string_view(pa.string_view())
+ assert not types.is_string_view(pa.string())
+ assert types.is_binary_view(pa.binary_view())
+ assert not types.is_binary_view(pa.binary())
+ assert not types.is_binary_view(pa.string_view())
+
def test_is_temporal_date_time_timestamp():
date_types = [pa.date32(), pa.date64()]
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index b6dc53d633543..ce3736b5af847 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -4375,6 +4375,36 @@ def large_utf8():
return large_string()
+def binary_view():
+ """
+ Create a variable-length binary view type.
+
+ Examples
+ --------
+ Create an instance of a string type:
+
+ >>> import pyarrow as pa
+ >>> pa.binary_view()
+ DataType(binary_view)
+ """
+ return primitive_type(_Type_BINARY_VIEW)
+
+
+def string_view():
+ """
+ Create UTF8 variable-length string view type.
+
+ Examples
+ --------
+ Create an instance of a string type:
+
+ >>> import pyarrow as pa
+ >>> pa.string_view()
+ DataType(string_view)
+ """
+ return primitive_type(_Type_STRING_VIEW)
+
+
def list_(value_type, int list_size=-1):
"""
Create ListType instance from child data type or field.
@@ -4991,6 +5021,8 @@ cdef dict _type_aliases = {
'large_str': large_string,
'large_utf8': large_string,
'large_binary': large_binary,
+ 'binary_view': binary_view,
+ 'string_view': string_view,
'date32': date32,
'date64': date64,
'date32[day]': date32,
diff --git a/python/pyarrow/types.py b/python/pyarrow/types.py
index 5d7dbe4b451b9..32398dac9c5f5 100644
--- a/python/pyarrow/types.py
+++ b/python/pyarrow/types.py
@@ -243,6 +243,16 @@ def is_fixed_size_binary(t):
return t.id == lib.Type_FIXED_SIZE_BINARY
+@doc(is_null, datatype="variable-length binary view")
+def is_binary_view(t):
+ return t.id == lib.Type_BINARY_VIEW
+
+
+@doc(is_null, datatype="variable-length string (utf-8) view")
+def is_string_view(t):
+ return t.id == lib.Type_STRING_VIEW
+
+
@doc(is_null, datatype="date")
def is_date(t):
return t.id in _DATE_TYPES
From 749f936fc77b83d3c0ec5642c16561b3afa5dfa7 Mon Sep 17 00:00:00 2001
From: Weston Pace
Date: Wed, 31 Jan 2024 08:24:11 -0800
Subject: [PATCH 10/74] MINOR: [CI] update weston codeowners (#39867)
### Rationale for this change
Currently I am unable to keep up with my Github inbox and thus respond to very little. I am trying to balance this.
### What changes are included in this PR?
Reduce the scope of files that will trigger automated review.
### Are these changes tested?
N/A
### Are there any user-facing changes?
No
Authored-by: Weston Pace
Signed-off-by: Weston Pace
---
.github/CODEOWNERS | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 41a075b1c0bcb..e7e544c2b0e62 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -30,15 +30,10 @@
# /cpp/
/cpp/src/arrow/acero @westonpace
/cpp/src/arrow/adapters/orc @wgtmac
-/cpp/src/arrow/dataset @westonpace
/cpp/src/arrow/engine @westonpace
/cpp/src/arrow/flight/ @lidavidm
-/cpp/src/arrow/util/async* @westonpace
-/cpp/src/arrow/util/future* @westonpace
-/cpp/src/arrow/util/thread* @westonpace
/cpp/src/parquet @wgtmac
-/cpp/src/skyhook @westonpace
-/csharp/ @westonpace
+/csharp/ @curthagenlocher
/go/ @zeroshade
/java/ @lidavidm
/js/ @domoritz @trxcllnt
From 2a87693134135a8af2ae2b6df41980176431b1c0 Mon Sep 17 00:00:00 2001
From: david dali susanibar arce
Date: Wed, 31 Jan 2024 13:38:54 -0500
Subject: [PATCH 11/74] GH-39680: [Java] enable half float support on Java
module (#39681)
### Rationale for this change
- To enable half float support on Java module.
### What changes are included in this PR?
- [x] Add initial Float16 type support
- [x] Unit test
- [x] Integration test
- [x] Documentation
### Are these changes tested?
Yes.
### Are there any user-facing changes?
No
* Closes: #39680
Authored-by: david dali susanibar arce
Signed-off-by: David Li
---
docs/source/status.rst | 9 +-
.../apache/arrow/dataset/TestAllTypes.java | 6 +-
.../org/apache/arrow/memory/util/Float16.java | 271 +++++++++++
.../org/apache/arrow/memory/TestArrowBuf.java | 11 +
.../main/codegen/data/ValueVectorTypes.tdd | 10 +
.../main/codegen/templates/UnionReader.java | 6 +-
.../org/apache/arrow/vector/Float2Vector.java | 434 ++++++++++++++++++
.../org/apache/arrow/vector/types/Types.java | 16 +-
.../apache/arrow/vector/TestValueVector.java | 198 ++++++++
9 files changed, 953 insertions(+), 8 deletions(-)
create mode 100644 java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java
create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java
diff --git a/docs/source/status.rst b/docs/source/status.rst
index 03a87012342c2..11dd9c2c2965c 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -40,7 +40,7 @@ Data Types
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| UInt8/16/32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Float16 | ✓ (1) | | ✓ | ✓ | ✓ (2)| ✓ | ✓ | |
+| Float16 | ✓ (1) | ✓ (2) | ✓ | ✓ | ✓ (3)| ✓ | ✓ | |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Float32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -104,7 +104,7 @@ Data Types
| Data type | C++ | Java | Go | JavaScript | C# | Rust | Julia | Swift |
| (special) | | | | | | | | |
+===================+=======+=======+=======+============+=======+=======+=======+=======+
-| Dictionary | ✓ | ✓ (3) | ✓ | ✓ | ✓ | ✓ (3) | ✓ | |
+| Dictionary | ✓ | ✓ (4) | ✓ | ✓ | ✓ | ✓ (3) | ✓ | |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Extension | ✓ | ✓ | ✓ | | | ✓ | ✓ | |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -114,8 +114,9 @@ Data Types
Notes:
* \(1) Casting to/from Float16 in C++ is not supported.
-* \(2) Float16 support in C# is only available when targeting .NET 6+.
-* \(3) Nested dictionaries not supported
+* \(2) Casting to/from Float16 in Java is not supported.
+* \(3) Float16 support in C# is only available when targeting .NET 6+.
+* \(4) Nested dictionaries not supported
.. seealso::
The :ref:`format_columnar` specification.
diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java b/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
index 13b247452348d..6d33cf057ed3a 100644
--- a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
@@ -32,6 +32,7 @@
import org.apache.arrow.dataset.file.DatasetFileWriter;
import org.apache.arrow.dataset.file.FileFormat;
import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.Float16;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.DateMilliVector;
@@ -39,6 +40,7 @@
import org.apache.arrow.vector.DecimalVector;
import org.apache.arrow.vector.DurationVector;
import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float2Vector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.Float8Vector;
import org.apache.arrow.vector.IntVector;
@@ -89,7 +91,6 @@ public class TestAllTypes extends TestDataset {
private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) {
// Notes:
- // - Float16 is not supported by Java.
// - IntervalMonthDayNano is not supported by Parquet.
// - Map (GH-38250) and SparseUnion are resulting in serialization errors when writing with the Dataset API.
// "Unhandled type for Arrow to Parquet schema conversion" errors: IntervalDay, IntervalYear, DenseUnion
@@ -109,6 +110,7 @@ private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) {
Field.nullablePrimitive("uint16", new ArrowType.Int(16, false)),
Field.nullablePrimitive("uint32", new ArrowType.Int(32, false)),
Field.nullablePrimitive("uint64", new ArrowType.Int(64, false)),
+ Field.nullablePrimitive("float16", new ArrowType.FloatingPoint(FloatingPointPrecision.HALF)),
Field.nullablePrimitive("float32", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
Field.nullablePrimitive("float64", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
Field.nullablePrimitive("utf8", ArrowType.Utf8.INSTANCE),
@@ -148,6 +150,7 @@ private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) {
root.getVector("uint16").setNull(0);
root.getVector("uint32").setNull(0);
root.getVector("uint64").setNull(0);
+ root.getVector("float16").setNull(0);
root.getVector("float32").setNull(0);
root.getVector("float64").setNull(0);
root.getVector("utf8").setNull(0);
@@ -180,6 +183,7 @@ private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) {
((UInt2Vector) root.getVector("uint16")).set(1, 1);
((UInt4Vector) root.getVector("uint32")).set(1, 1);
((UInt8Vector) root.getVector("uint64")).set(1, 1);
+ ((Float2Vector) root.getVector("float16")).set(1, Float16.toFloat16(+32.875f));
((Float4Vector) root.getVector("float32")).set(1, 1.0f);
((Float8Vector) root.getVector("float64")).set(1, 1.0);
((VarCharVector) root.getVector("utf8")).set(1, new Text("a"));
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java
new file mode 100644
index 0000000000000..8040158fd090e
--- /dev/null
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+
+import org.apache.arrow.util.VisibleForTesting;
+
+/**
+ * Lifted from Apache Parquet MR project:
+ * https://github.com/apache/parquet-mr/blob/e87b80308869b77f914fcfd04364686e11158950/parquet-column/src/main/java/org/apache/parquet/schema/Float16.java
+ *
+ * Changes made:
+ * - Modify the data type input from Parquet-MR Binary (toFloat(Binary b)) to Arrow Java short (toFloat(short b))
+ * - Expose NAN and POSITIVE_INFINITY variables
+ *
+ *
+ *
+ * The class is a utility class to manipulate half-precision 16-bit
+ * IEEE 754
+ * floating point data types (also called fp16 or binary16). A half-precision float can be
+ * created from or converted to single-precision floats, and is stored in a short data type.
+ * The IEEE 754 standard specifies an float16 as having the following format:
+ *
+ * - Sign bit: 1 bit
+ * - Exponent width: 5 bits
+ * - Significand: 10 bits
+ *
+ *
+ * The format is laid out as follows:
+ *
+ * 1 11111 1111111111
+ * ^ --^-- -----^----
+ * sign | |_______ significand
+ * |
+ * -- exponent
+ *
+ * Half-precision floating points can be useful to save memory and/or
+ * bandwidth at the expense of range and precision when compared to single-precision
+ * floating points (float32).
+ * Ref: https://android.googlesource.com/platform/libcore/+/master/luni/src/main/java/libcore/util/FP16.java
+ */
+public class Float16 {
+ // Positive infinity of type half-precision float.
+ public static final short POSITIVE_INFINITY = (short) 0x7c00;
+ // A Not-a-Number representation of a half-precision float.
+ public static final short NaN = (short) 0x7e00;
+ // The bitmask to and a number with to obtain the sign bit.
+ private static final int SIGN_MASK = 0x8000;
+ // The offset to shift by to obtain the exponent bits.
+ private static final int EXPONENT_SHIFT = 10;
+ // The bitmask to and a number shifted by EXPONENT_SHIFT right, to obtain exponent bits.
+ private static final int SHIFTED_EXPONENT_MASK = 0x1f;
+ // The bitmask to and a number with to obtain significand bits.
+ private static final int SIGNIFICAND_MASK = 0x3ff;
+ // The offset of the exponent from the actual value.
+ private static final int EXPONENT_BIAS = 15;
+ // The offset to shift by to obtain the sign bit.
+ private static final int SIGN_SHIFT = 15;
+ // The bitmask to AND with to obtain exponent and significand bits.
+ private static final int EXPONENT_SIGNIFICAND_MASK = 0x7fff;
+
+ private static final int FP32_SIGN_SHIFT = 31;
+ private static final int FP32_EXPONENT_SHIFT = 23;
+ private static final int FP32_SHIFTED_EXPONENT_MASK = 0xff;
+ private static final int FP32_SIGNIFICAND_MASK = 0x7fffff;
+ private static final int FP32_EXPONENT_BIAS = 127;
+ private static final int FP32_QNAN_MASK = 0x400000;
+ private static final int FP32_DENORMAL_MAGIC = 126 << 23;
+ private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC);
+
+ /**
+ * Returns true if the specified half-precision float value represents
+ * a Not-a-Number, false otherwise.
+ *
+ * @param h A half-precision float value
+ * @return True if the value is a NaN, false otherwise
+ *
+ */
+ @VisibleForTesting
+ public static boolean isNaN(short h) {
+ return (h & EXPONENT_SIGNIFICAND_MASK) > POSITIVE_INFINITY;
+ }
+
+ /**
+ * Compares the two specified half-precision float values. The following
+ * conditions apply during the comparison:
+ *
+ *
+ * - NaN is considered by this method to be equal to itself and greater
+ * than all other half-precision float values (including {@code #POSITIVE_INFINITY})
+ * - POSITIVE_ZERO is considered by this method to be greater than NEGATIVE_ZERO.
+ *
+ *
+ * @param x The first half-precision float value to compare.
+ * @param y The second half-precision float value to compare
+ *
+ * @return The value {@code 0} if {@code x} is numerically equal to {@code y}, a
+ * value less than {@code 0} if {@code x} is numerically less than {@code y},
+ * and a value greater than {@code 0} if {@code x} is numerically greater
+ * than {@code y}
+ *
+ */
+ @VisibleForTesting
+ public static int compare(short x, short y) {
+ boolean xIsNaN = isNaN(x);
+ boolean yIsNaN = isNaN(y);
+
+ if (!xIsNaN && !yIsNaN) {
+ int first = ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff);
+ int second = ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
+ // Returns true if the first half-precision float value is less
+ // (smaller toward negative infinity) than the second half-precision float value.
+ if (first < second) {
+ return -1;
+ }
+
+ // Returns true if the first half-precision float value is greater
+ // (larger toward positive infinity) than the second half-precision float value.
+ if (first > second) {
+ return 1;
+ }
+ }
+
+ // Collapse NaNs, akin to halfToIntBits(), but we want to keep
+ // (signed) short value types to preserve the ordering of -0.0
+ // and +0.0
+ short xBits = xIsNaN ? NaN : x;
+ short yBits = yIsNaN ? NaN : y;
+ return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1));
+ }
+
+ /**
+ * Converts the specified half-precision float value into a
+ * single-precision float value. The following special cases are handled:
+ * If the input is NaN, the returned value is Float NaN.
+ * If the input is POSITIVE_INFINITY or NEGATIVE_INFINITY, the returned value is respectively
+ * Float POSITIVE_INFINITY or Float NEGATIVE_INFINITY.
+ * If the input is 0 (positive or negative), the returned value is +/-0.0f.
+ * Otherwise, the returned value is a normalized single-precision float value.
+ *
+ * @param b The half-precision float value to convert to single-precision
+ * @return A normalized single-precision float value
+ */
+ @VisibleForTesting
+ public static float toFloat(short b) {
+ int bits = b & 0xffff;
+ int s = bits & SIGN_MASK;
+ int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK;
+ int m = (bits) & SIGNIFICAND_MASK;
+ int outE = 0;
+ int outM = 0;
+ if (e == 0) { // Denormal or 0
+ if (m != 0) {
+ // Convert denorm fp16 into normalized fp32
+ float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m);
+ o -= FP32_DENORMAL_FLOAT;
+ return s == 0 ? o : -o;
+ }
+ } else {
+ outM = m << 13;
+ if (e == 0x1f) { // Infinite or NaN
+ outE = 0xff;
+ if (outM != 0) { // SNaNs are quieted
+ outM |= FP32_QNAN_MASK;
+ }
+ } else {
+ outE = e - EXPONENT_BIAS + FP32_EXPONENT_BIAS;
+ }
+ }
+ int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM;
+ return Float.intBitsToFloat(out);
+ }
+
+ /**
+ * Converts the specified single-precision float value into a
+ * half-precision float value. The following special cases are handled:
+ *
+ * If the input is NaN, the returned value is NaN.
+ * If the input is Float POSITIVE_INFINITY or Float NEGATIVE_INFINITY,
+ * the returned value is respectively POSITIVE_INFINITY or NEGATIVE_INFINITY.
+ * If the input is 0 (positive or negative), the returned value is
+ * POSITIVE_ZERO or NEGATIVE_ZERO.
+ * If the input is a less than MIN_VALUE, the returned value
+ * is flushed to POSITIVE_ZERO or NEGATIVE_ZERO.
+ * If the input is a less than MIN_NORMAL, the returned value
+ * is a denorm half-precision float.
+ * Otherwise, the returned value is rounded to the nearest
+ * representable half-precision float value.
+ *
+ * @param f The single-precision float value to convert to half-precision
+ * @return A half-precision float value
+ */
+ public static short toFloat16(float f) {
+ int bits = Float.floatToRawIntBits(f);
+ int s = (bits >>> FP32_SIGN_SHIFT);
+ int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_SHIFTED_EXPONENT_MASK;
+ int m = (bits) & FP32_SIGNIFICAND_MASK;
+ int outE = 0;
+ int outM = 0;
+ if (e == 0xff) { // Infinite or NaN
+ outE = 0x1f;
+ outM = m != 0 ? 0x200 : 0;
+ } else {
+ e = e - FP32_EXPONENT_BIAS + EXPONENT_BIAS;
+ if (e >= 0x1f) { // Overflow
+ outE = 0x1f;
+ } else if (e <= 0) { // Underflow
+ if (e < -10) {
+ // The absolute fp32 value is less than MIN_VALUE, flush to +/-0
+ } else {
+ // The fp32 value is a normalized float less than MIN_NORMAL,
+ // we convert to a denorm fp16
+ m = m | 0x800000;
+ int shift = 14 - e;
+ outM = m >> shift;
+ int lowm = m & ((1 << shift) - 1);
+ int hway = 1 << (shift - 1);
+ // if above halfway or exactly halfway and outM is odd
+ if (lowm + (outM & 1) > hway) {
+ // Round to nearest even
+ // Can overflow into exponent bit, which surprisingly is OK.
+ // This increment relies on the +outM in the return statement below
+ outM++;
+ }
+ }
+ } else {
+ outE = e;
+ outM = m >> 13;
+ // if above halfway or exactly halfway and outM is odd
+ if ((m & 0x1fff) + (outM & 0x1) > 0x1000) {
+ // Round to nearest even
+ // Can overflow into exponent bit, which surprisingly is OK.
+ // This increment relies on the +outM in the return statement below
+ outM++;
+ }
+ }
+ }
+ // The outM is added here as the +1 increments for outM above can
+ // cause an overflow in the exponent bit which is OK.
+ return (short) ((s << SIGN_SHIFT) | (outE << EXPONENT_SHIFT) + outM);
+ }
+
+ /**
+ * Returns a string representation of the specified half-precision
+ * float value. Calling this method is equivalent to calling
+ * Float.toString(toFloat(h))
. See {@link Float#toString(float)}
+ * for more information on the format of the string representation.
+ *
+ * @param h A half-precision float value in binary little-endian format
+ * @return A string representation of the specified value
+ */
+ @VisibleForTesting
+ public static String toFloatString(short h) {
+ return Float.toString(Float16.toFloat(h));
+ }
+}
diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
index 9ba42abc1ce89..b4385b72a38cf 100644
--- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
+++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
@@ -29,6 +29,7 @@
import java.nio.ByteOrder;
import java.util.Arrays;
+import org.apache.arrow.memory.util.Float16;
import org.junit.Test;
import org.slf4j.LoggerFactory;
@@ -180,4 +181,14 @@ public void testEnabledHistoricalLog() {
((Logger) LoggerFactory.getLogger("org.apache.arrow")).setLevel(null);
}
}
+
+ @Test
+ public void testArrowBufFloat16() {
+ try (BufferAllocator allocator = new RootAllocator();
+ ArrowBuf buf = allocator.buffer(1024)
+ ) {
+ buf.setShort(0, Float16.toFloat16(+32.875f));
+ assertEquals((short) 0x501c, buf.getShort(0));
+ }
+ }
}
diff --git a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
index 2a921804202f0..6c2a967712454 100644
--- a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
+++ b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
@@ -49,6 +49,16 @@
{ class: "SmallInt", valueHolder: "Int2Holder"},
]
},
+ {
+ major: "Fixed",
+ width: 2,
+ javaType: "short",
+ boxedType: "Short",
+ fields: [{name: "value", type: "short"}],
+ minor: [
+ { class: "Float2", valueHolder: "Int2Holder"},
+ ]
+ },
{
major: "Fixed",
width: 4,
diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java
index 56a6cc90b321b..822d4822987fb 100644
--- a/java/vector/src/main/codegen/templates/UnionReader.java
+++ b/java/vector/src/main/codegen/templates/UnionReader.java
@@ -39,7 +39,9 @@
@SuppressWarnings("unused")
public class UnionReader extends AbstractFieldReader {
- private BaseReader[] readers = new BaseReader[45];
+ private static final int NUM_SUPPORTED_TYPES = 46;
+
+ private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES];
public UnionVector data;
public UnionReader(UnionVector data) {
@@ -50,7 +52,7 @@ public MinorType getMinorType() {
return TYPES[data.getTypeValue(idx())];
}
- private static MinorType[] TYPES = new MinorType[45];
+ private static MinorType[] TYPES = new MinorType[NUM_SUPPORTED_TYPES];
static {
for (MinorType minorType : MinorType.values()) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java
new file mode 100644
index 0000000000000..9d3f25769abff
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java
@@ -0,0 +1,434 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.Float16;
+import org.apache.arrow.vector.complex.impl.Float2ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.Float2Holder;
+import org.apache.arrow.vector.holders.NullableFloat2Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * Float2Vector implements a fixed width (2 bytes) vector of
+ * short values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class Float2Vector extends BaseFixedWidthVector implements FloatingPointVector {
+ public static final byte TYPE_WIDTH = 2;
+
+ /**
+ * Instantiate a Float2Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public Float2Vector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.FLOAT2.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a Float2Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Float2Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a Float2Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Float2Vector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ }
+
+ @Override
+ protected FieldReader getReaderImpl() {
+ return new Float2ReaderImpl(Float2Vector.this);
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.FLOAT2;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public short get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getShort((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableFloat2Holder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getShort((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ @Override
+ public Short getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getShort((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ static short get(final ArrowBuf buffer, final int index) {
+ return buffer.getShort((long) index * TYPE_WIDTH);
+ }
+
+ @Override
+ public double getValueAsDouble(int index) {
+ return getValueAsFloat(index);
+ }
+
+ public float getValueAsFloat(int index) {
+ return Float16.toFloat(this.get(index));
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ private void setValue(int index, short value) {
+ valueBuffer.setShort((long) index * TYPE_WIDTH, value);
+ }
+
+ private void setValue(int index, float value) {
+ valueBuffer.setShort((long) index * TYPE_WIDTH, Float16.toFloat16(value));
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, short value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setWithPossibleTruncate(int index, float value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableFloat2Holder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, Float2Holder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, short)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, short value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #setWithPossibleTruncate(int, float)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafeWithPossibleTruncate(int index, float value) {
+ handleSafe(index);
+ setWithPossibleTruncate(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableFloat2Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableFloat2Holder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, Float2Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, Float2Holder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, short value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setWithPossibleTruncate(int index, int isSet, float value) {
+ if (isSet > 0) {
+ setWithPossibleTruncate(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, short)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, short value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Same as {@link #set(int, int, short)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafeWithPossibleTruncate(int index, int isSet, float value) {
+ handleSafe(index);
+ setWithPossibleTruncate(index, isSet, value);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, double value) {
+ throw new UnsupportedOperationException("The operation for double data types is not supported.");
+ }
+
+ @Override
+ public void setSafeWithPossibleTruncate(int index, double value) {
+ throw new UnsupportedOperationException("The operation for double data types is not supported.");
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair comprising this and a target vector of
+ * the same type.
+ *
+ * @param field Field object used by the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(Field field, BufferAllocator allocator) {
+ return new TransferImpl(field, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((Float2Vector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ Float2Vector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new Float2Vector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(Field field, BufferAllocator allocator) {
+ to = new Float2Vector(field, allocator);
+ }
+
+ public TransferImpl(Float2Vector to) {
+ this.to = to;
+ }
+
+ @Override
+ public Float2Vector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, Float2Vector.this);
+ }
+ }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
index f29157524f2df..0b0e0d66a98f0 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
@@ -18,6 +18,7 @@
package org.apache.arrow.vector.types;
import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE;
+import static org.apache.arrow.vector.types.FloatingPointPrecision.HALF;
import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE;
import static org.apache.arrow.vector.types.UnionMode.Dense;
import static org.apache.arrow.vector.types.UnionMode.Sparse;
@@ -33,6 +34,7 @@
import org.apache.arrow.vector.ExtensionTypeVector;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float2Vector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.Float8Vector;
import org.apache.arrow.vector.IntVector;
@@ -79,6 +81,7 @@
import org.apache.arrow.vector.complex.impl.DenseUnionWriter;
import org.apache.arrow.vector.complex.impl.DurationWriterImpl;
import org.apache.arrow.vector.complex.impl.FixedSizeBinaryWriterImpl;
+import org.apache.arrow.vector.complex.impl.Float2WriterImpl;
import org.apache.arrow.vector.complex.impl.Float4WriterImpl;
import org.apache.arrow.vector.complex.impl.Float8WriterImpl;
import org.apache.arrow.vector.complex.impl.IntWriterImpl;
@@ -432,6 +435,17 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
return new IntervalYearWriterImpl((IntervalYearVector) vector);
}
},
+ FLOAT2(new FloatingPoint(HALF)) {
+ @Override
+ public FieldVector getNewVector(Field field, BufferAllocator allocator, CallBack schemaChangeCallback) {
+ return new Float2Vector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new Float2WriterImpl((Float2Vector) vector);
+ }
+ },
// 4 byte ieee 754
FLOAT4(new FloatingPoint(SINGLE)) {
@Override
@@ -894,7 +908,7 @@ public MinorType visit(Int type) {
public MinorType visit(FloatingPoint type) {
switch (type.getPrecision()) {
case HALF:
- throw new UnsupportedOperationException("NYI: " + type);
+ return MinorType.FLOAT2;
case SINGLE:
return MinorType.FLOAT4;
case DOUBLE:
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
index 614aff18d4554..10091aebdd50b 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
@@ -332,6 +332,204 @@ public void testSizeOfValueBuffer() {
}
}
+ @Test
+ public void testFixedFloat2() {
+ try (final Float2Vector floatVector = new Float2Vector(EMPTY_SCHEMA_PATH, allocator)) {
+ boolean error = false;
+ int initialCapacity = 16;
+
+ /* we should not throw exception for these values of capacity */
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1);
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT);
+
+ try {
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT * 4);
+ } catch (OversizedAllocationException oe) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ floatVector.setInitialCapacity(initialCapacity);
+ /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */
+ assertEquals(0, floatVector.getValueCapacity());
+
+ /* allocate 32 bytes (16 * 2) */
+ floatVector.allocateNew();
+ /* underlying buffer should be able to store 16 values */
+ assertTrue(floatVector.getValueCapacity() >= initialCapacity);
+ initialCapacity = floatVector.getValueCapacity();
+
+ floatVector.zeroVector();
+
+ /* populate the floatVector */
+ floatVector.set(0, (short) 0x101c); // Float16.toFloat16(+0.00050163269043f)
+ floatVector.set(2, (short) 0x901c); // Float16.toFloat16(-0.00050163269043f)
+ floatVector.set(4, (short) 0x101d); // Float16.toFloat16(+0.000502109527588f)
+ floatVector.set(6, (short) 0x901d); // Float16.toFloat16(-0.000502109527588f)
+ floatVector.set(8, (short) 0x121c); // Float16.toFloat16(+0.00074577331543f)
+ floatVector.set(10, (short) 0x921c); // Float16.toFloat16(-0.00074577331543f)
+ floatVector.set(12, (short) 0x501c); // Float16.toFloat16(+32.875f)
+ floatVector.set(14, (short) 0xd01c); // Float16.toFloat16(-32.875f)
+
+ try {
+ floatVector.set(initialCapacity, (short) 0x141c);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* check vector contents */
+ assertEquals((short) 0x101c, floatVector.get(0));
+ assertEquals((short) 0x901c, floatVector.get(2));
+ assertEquals((short) 0x101d, floatVector.get(4));
+ assertEquals((short) 0x901d, floatVector.get(6));
+ assertEquals((short) 0x121c, floatVector.get(8));
+ assertEquals((short) 0x921c, floatVector.get(10));
+ assertEquals((short) 0x501c, floatVector.get(12));
+ assertEquals((short) 0xd01c, floatVector.get(14));
+
+ try {
+ floatVector.get(initialCapacity);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ }
+
+ /* this should trigger a realloc() */
+ floatVector.setSafe(initialCapacity, (short) 0x141c); // Float16.toFloat16(+0.00100326538086f)
+
+ /* underlying buffer should now be able to store double the number of values */
+ assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2);
+
+ /* vector data should still be intact after realloc */
+ assertEquals((short) 0x101c, floatVector.get(0));
+ assertEquals((short) 0x901c, floatVector.get(2));
+ assertEquals((short) 0x101d, floatVector.get(4));
+ assertEquals((short) 0x901d, floatVector.get(6));
+ assertEquals((short) 0x121c, floatVector.get(8));
+ assertEquals((short) 0x921c, floatVector.get(10));
+ assertEquals((short) 0x501c, floatVector.get(12));
+ assertEquals((short) 0xd01c, floatVector.get(14));
+ assertEquals((short) 0x141c, floatVector.get(initialCapacity));
+
+ /* reset the vector */
+ int capacityBeforeReset = floatVector.getValueCapacity();
+ floatVector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeReset, floatVector.getValueCapacity());
+
+ /* vector data should be zeroed out */
+ for (int i = 0; i < capacityBeforeReset; i++) {
+ assertTrue("non-zero data not expected at index: " + i, floatVector.isNull(i));
+ }
+ }
+ }
+
+ @Test
+ public void testFixedFloat2WithPossibleTruncate() {
+ try (final Float2Vector floatVector = new Float2Vector(EMPTY_SCHEMA_PATH, allocator)) {
+ boolean error = false;
+ int initialCapacity = 16;
+
+ /* we should not throw exception for these values of capacity */
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1);
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT);
+
+ try {
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT * 4);
+ } catch (OversizedAllocationException oe) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ floatVector.setInitialCapacity(initialCapacity);
+ /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */
+ assertEquals(0, floatVector.getValueCapacity());
+
+ /* allocate 32 bytes (16 * 2) */
+ floatVector.allocateNew();
+ /* underlying buffer should be able to store 16 values */
+ assertTrue(floatVector.getValueCapacity() >= initialCapacity);
+ initialCapacity = floatVector.getValueCapacity();
+
+ floatVector.zeroVector();
+
+ /* populate the floatVector */
+ floatVector.set(0, (short) 0x101c); // Float16.toFloat16(+0.00050163269043f)
+ floatVector.set(2, (short) 0x901c); // Float16.toFloat16(-0.00050163269043f)
+ floatVector.set(4, (short) 0x101d); // Float16.toFloat16(+0.000502109527588f)
+ floatVector.setWithPossibleTruncate(6, 2049.0f); // in f32=2049.000000, out f16=2048
+ floatVector.setWithPossibleTruncate(8, 4098.0f); // in f32=4098.000000, out f16=4096
+ floatVector.setWithPossibleTruncate(10, 8196.0f); // in f32=8196.000000, out f16=8192
+ floatVector.setWithPossibleTruncate(12, 16392.0f); // in f32=16392.000000, out f16=16384
+ floatVector.setWithPossibleTruncate(14, 32784.0f); // in f32=32784.000000, out f16=32768
+
+ try {
+ floatVector.setWithPossibleTruncate(initialCapacity, 1.618034f); // in f32=1.618034, out f16=1.6181641
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* check vector contents */
+ assertEquals((short) 0x101c, floatVector.get(0));
+ assertEquals((short) 0x901c, floatVector.get(2));
+ assertEquals((short) 0x101d, floatVector.get(4));
+ assertEquals(2048.0f, floatVector.getValueAsFloat(6), 0);
+ assertEquals(4096.0f, floatVector.getValueAsFloat(8), 0);
+ assertEquals(8192.0f, floatVector.getValueAsFloat(10), 0);
+ assertEquals(16384.0f, floatVector.getValueAsDouble(12), 0);
+ assertEquals(32768.0f, floatVector.getValueAsDouble(14), 0);
+
+ try {
+ floatVector.get(initialCapacity);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ }
+
+ /* this should trigger a realloc() */
+ floatVector.setSafeWithPossibleTruncate(initialCapacity, 1.618034f); // in f32=1.618034, out f16=1.6181641
+
+ /* underlying buffer should now be able to store double the number of values */
+ assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2);
+
+ /* vector data should still be intact after realloc */
+ assertEquals((short) 0x101c, floatVector.get(0));
+ assertEquals((short) 0x901c, floatVector.get(2));
+ assertEquals((short) 0x101d, floatVector.get(4));
+ assertEquals(2048.0f, floatVector.getValueAsFloat(6), 0);
+ assertEquals(4096.0f, floatVector.getValueAsFloat(8), 0);
+ assertEquals(8192.0f, floatVector.getValueAsFloat(10), 0);
+ assertEquals(16384.0f, floatVector.getValueAsDouble(12), 0);
+ assertEquals(32768.0f, floatVector.getValueAsDouble(14), 0);
+ assertEquals(1.6181641f, floatVector.getValueAsDouble(initialCapacity), 0);
+
+ /* reset the vector */
+ int capacityBeforeReset = floatVector.getValueCapacity();
+ floatVector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeReset, floatVector.getValueCapacity());
+
+ /* vector data should be zeroed out */
+ for (int i = 0; i < capacityBeforeReset; i++) {
+ assertTrue("non-zero data not expected at index: " + i, floatVector.isNull(i));
+ }
+ }
+ }
+
@Test /* Float4Vector */
public void testFixedType3() {
try (final Float4Vector floatVector = new Float4Vector(EMPTY_SCHEMA_PATH, allocator)) {
From 6ccfeeec3b864671556e50c1ac01e65f47bd06d9 Mon Sep 17 00:00:00 2001
From: mwish
Date: Thu, 1 Feb 2024 21:14:47 +0800
Subject: [PATCH 12/74] GH-39876: [C++] Thirdparty: Bump zlib to 1.3.1 (#39877)
### Rationale for this change
zlib 1.3.1 is the latest release.
### What changes are included in this PR?
Bump zlib to 1.3.1
### Are these changes tested?
Already has testing
### Are there any user-facing changes?
no
* Closes: #39876
Authored-by: mwish
Signed-off-by: Sutou Kouhei
---
cpp/thirdparty/versions.txt | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 2664775c0fbf4..dd3f5da84f777 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -115,8 +115,8 @@ ARROW_UTF8PROC_BUILD_VERSION=v2.7.0
ARROW_UTF8PROC_BUILD_SHA256_CHECKSUM=4bb121e297293c0fd55f08f83afab6d35d48f0af4ecc07523ad8ec99aa2b12a1
ARROW_XSIMD_BUILD_VERSION=9.0.1
ARROW_XSIMD_BUILD_SHA256_CHECKSUM=b1bb5f92167fd3a4f25749db0be7e61ed37e0a5d943490f3accdcd2cd2918cc0
-ARROW_ZLIB_BUILD_VERSION=1.3
-ARROW_ZLIB_BUILD_SHA256_CHECKSUM=ff0ba4c292013dbc27530b3a81e1f9a813cd39de01ca5e0f8bf355702efa593e
+ARROW_ZLIB_BUILD_VERSION=1.3.1
+ARROW_ZLIB_BUILD_SHA256_CHECKSUM=9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23
ARROW_ZSTD_BUILD_VERSION=1.5.5
ARROW_ZSTD_BUILD_SHA256_CHECKSUM=9c4396cc829cfae319a6e2615202e82aad41372073482fce286fac78646d3ee4
From 2721134715b7dedfa2715bcf47548728ff702d5a Mon Sep 17 00:00:00 2001
From: mwish
Date: Thu, 1 Feb 2024 21:24:42 +0800
Subject: [PATCH 13/74] GH-39845: [C++][Parquet] Minor: avoid creating a new
Reader object in Decoder::SetData (#39847)
### Rationale for this change
avoid creating a new Reader object in Decoder::SetData
### What changes are included in this PR?
avoid creating a new Reader object in Decoder::SetData
### Are these changes tested?
Already
### Are there any user-facing changes?
no
* Closes: #39845
Authored-by: mwish
Signed-off-by: mwish
---
cpp/src/parquet/encoding.cc | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index b801b5ab11bb9..5573f5b9aed4c 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -2411,7 +2411,11 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecodernum_values_ = num_values;
- decoder_ = std::make_shared<::arrow::bit_util::BitReader>(data, len);
+ if (decoder_ == nullptr) {
+ decoder_ = std::make_shared<::arrow::bit_util::BitReader>(data, len);
+ } else {
+ decoder_->Reset(data, len);
+ }
InitHeader();
}
@@ -2769,7 +2773,11 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl,
void SetData(int num_values, const uint8_t* data, int len) override {
DecoderImpl::SetData(num_values, data, len);
- decoder_ = std::make_shared<::arrow::bit_util::BitReader>(data, len);
+ if (decoder_ == nullptr) {
+ decoder_ = std::make_shared<::arrow::bit_util::BitReader>(data, len);
+ } else {
+ decoder_->Reset(data, len);
+ }
DecodeLengths();
}
From 44d5597a0e8a4d635f1aec82ba885f61b5c17829 Mon Sep 17 00:00:00 2001
From: Alenka Frim
Date: Thu, 1 Feb 2024 14:35:32 +0100
Subject: [PATCH 14/74] GH-39849: [Python] Remove the use of
pytest-lazy-fixture (#39850)
### Rationale for this change
Removing the use of `pytest-lazy-fixture` in our test suite as it is unmaintained.
Changes in this PR include:
- Remove the use of `pytest-lazy-fixture`
- Remove marks from fixtures to avoid future error, see
```
PytestRemovedIn9Warning: Marks applied to fixtures have no effect
See docs: https://docs.pytest.org/en/stable/deprecations.html#applying-a-mark-to-a-fixture-function
```
- Catch two different warnings in `def test_legacy_int_type()`
### Are these changes tested?
The changes affect the tests so they must pass.
### Are there any user-facing changes?
No.
* Closes: #39849
Lead-authored-by: AlenkaF
Co-authored-by: Joris Van den Bossche
Signed-off-by: Joris Van den Bossche
---
ci/conda_env_python.txt | 3 +-
dev/tasks/conda-recipes/arrow-cpp/meta.yaml | 1 -
python/pyarrow/tests/conftest.py | 7 ++---
python/pyarrow/tests/test_dataset.py | 3 --
python/pyarrow/tests/test_extension_type.py | 5 +--
python/pyarrow/tests/test_fs.py | 34 ++++++++++-----------
python/pyarrow/tests/test_ipc.py | 6 ++--
python/requirements-test.txt | 1 -
python/requirements-wheel-test.txt | 1 -
9 files changed, 25 insertions(+), 36 deletions(-)
diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt
index 5fdd21d2bd1f9..59e2def1bf339 100644
--- a/ci/conda_env_python.txt
+++ b/ci/conda_env_python.txt
@@ -23,9 +23,8 @@ cloudpickle
fsspec
hypothesis
numpy>=1.16.6
-pytest<8 # pytest-lazy-fixture broken on pytest 8.0.0
+pytest<8
pytest-faulthandler
-pytest-lazy-fixture
s3fs>=2023.10.0
setuptools
setuptools_scm<8.0.0
diff --git a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
index b8ffbfdb715b6..367445c595c4b 100644
--- a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
+++ b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
@@ -340,7 +340,6 @@ outputs:
# test_cpp_extension_in_python requires a compiler
- {{ compiler("cxx") }} # [linux]
- pytest
- - pytest-lazy-fixture
- backports.zoneinfo # [py<39]
- boto3
- cffi
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index a5941e8c8d1a8..0da757a4bc56e 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -24,7 +24,6 @@
import urllib.request
import pytest
-from pytest_lazyfixture import lazy_fixture
import hypothesis as h
from ..conftest import groups, defaults
@@ -259,13 +258,13 @@ def gcs_server():
@pytest.fixture(
params=[
- lazy_fixture('builtin_pickle'),
- lazy_fixture('cloudpickle')
+ 'builtin_pickle',
+ 'cloudpickle'
],
scope='session'
)
def pickle_module(request):
- return request.param
+ return request.getfixturevalue(request.param)
@pytest.fixture(scope='session')
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index a4838d63a6b0b..a9054f0b174aa 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -100,7 +100,6 @@ def assert_dataset_fragment_convenience_methods(dataset):
@pytest.fixture
-@pytest.mark.parquet
def mockfs():
mockfs = fs._MockFileSystem()
@@ -221,7 +220,6 @@ def multisourcefs(request):
@pytest.fixture
-@pytest.mark.parquet
def dataset(mockfs):
format = ds.ParquetFileFormat()
selector = fs.FileSelector('subdir', recursive=True)
@@ -2692,7 +2690,6 @@ def test_dataset_partitioned_dictionary_type_reconstruct(tempdir, pickle_module)
@pytest.fixture
-@pytest.mark.parquet
def s3_example_simple(s3_server):
from pyarrow.fs import FileSystem
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index a88e20eefe098..d8c792ef00c6b 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -1485,10 +1485,7 @@ def test_legacy_int_type():
batch = pa.RecordBatch.from_arrays([ext_arr], names=['ext'])
buf = ipc_write_batch(batch)
- with pytest.warns(
- RuntimeWarning,
- match="pickle-based deserialization of pyarrow.PyExtensionType "
- "subclasses is disabled by default"):
+ with pytest.warns((RuntimeWarning, FutureWarning)):
batch = ipc_read_batch(buf)
assert isinstance(batch.column(0).type, pa.UnknownExtensionType)
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index d0fa253e314e9..ab10addfc3d4c 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -362,79 +362,79 @@ def py_fsspec_s3fs(request, s3_server):
@pytest.fixture(params=[
pytest.param(
- pytest.lazy_fixture('localfs'),
+ 'localfs',
id='LocalFileSystem()'
),
pytest.param(
- pytest.lazy_fixture('localfs_with_mmap'),
+ 'localfs_with_mmap',
id='LocalFileSystem(use_mmap=True)'
),
pytest.param(
- pytest.lazy_fixture('subtree_localfs'),
+ 'subtree_localfs',
id='SubTreeFileSystem(LocalFileSystem())'
),
pytest.param(
- pytest.lazy_fixture('s3fs'),
+ 's3fs',
id='S3FileSystem',
marks=pytest.mark.s3
),
pytest.param(
- pytest.lazy_fixture('gcsfs'),
+ 'gcsfs',
id='GcsFileSystem',
marks=pytest.mark.gcs
),
pytest.param(
- pytest.lazy_fixture('hdfs'),
+ 'hdfs',
id='HadoopFileSystem',
marks=pytest.mark.hdfs
),
pytest.param(
- pytest.lazy_fixture('mockfs'),
+ 'mockfs',
id='_MockFileSystem()'
),
pytest.param(
- pytest.lazy_fixture('py_localfs'),
+ 'py_localfs',
id='PyFileSystem(ProxyHandler(LocalFileSystem()))'
),
pytest.param(
- pytest.lazy_fixture('py_mockfs'),
+ 'py_mockfs',
id='PyFileSystem(ProxyHandler(_MockFileSystem()))'
),
pytest.param(
- pytest.lazy_fixture('py_fsspec_localfs'),
+ 'py_fsspec_localfs',
id='PyFileSystem(FSSpecHandler(fsspec.LocalFileSystem()))'
),
pytest.param(
- pytest.lazy_fixture('py_fsspec_memoryfs'),
+ 'py_fsspec_memoryfs',
id='PyFileSystem(FSSpecHandler(fsspec.filesystem("memory")))'
),
pytest.param(
- pytest.lazy_fixture('py_fsspec_s3fs'),
+ 'py_fsspec_s3fs',
id='PyFileSystem(FSSpecHandler(s3fs.S3FileSystem()))',
marks=pytest.mark.s3
),
])
def filesystem_config(request):
- return request.param
+ return request.getfixturevalue(request.param)
@pytest.fixture
-def fs(request, filesystem_config):
+def fs(filesystem_config):
return filesystem_config['fs']
@pytest.fixture
-def pathfn(request, filesystem_config):
+def pathfn(filesystem_config):
return filesystem_config['pathfn']
@pytest.fixture
-def allow_move_dir(request, filesystem_config):
+def allow_move_dir(filesystem_config):
return filesystem_config['allow_move_dir']
@pytest.fixture
-def allow_append_to_file(request, filesystem_config):
+def allow_append_to_file(filesystem_config):
return filesystem_config['allow_append_to_file']
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index f75ec8158a9da..407011d90b734 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -142,16 +142,16 @@ def stream_fixture():
@pytest.fixture(params=[
pytest.param(
- pytest.lazy_fixture('file_fixture'),
+ 'file_fixture',
id='File Format'
),
pytest.param(
- pytest.lazy_fixture('stream_fixture'),
+ 'stream_fixture',
id='Stream Format'
)
])
def format_fixture(request):
- return request.param
+ return request.getfixturevalue(request.param)
def test_empty_file():
diff --git a/python/requirements-test.txt b/python/requirements-test.txt
index b3ba5d852b968..2108d70a543f5 100644
--- a/python/requirements-test.txt
+++ b/python/requirements-test.txt
@@ -2,5 +2,4 @@ cffi
hypothesis
pandas
pytest<8
-pytest-lazy-fixture
pytz
diff --git a/python/requirements-wheel-test.txt b/python/requirements-wheel-test.txt
index c74a8ca6908a7..a1046bc18c704 100644
--- a/python/requirements-wheel-test.txt
+++ b/python/requirements-wheel-test.txt
@@ -2,7 +2,6 @@ cffi
cython
hypothesis
pytest<8
-pytest-lazy-fixture
pytz
tzdata; sys_platform == 'win32'
From 3d45ac96534fc76b820b488aa02182e6b93a388f Mon Sep 17 00:00:00 2001
From: "y.yoshida5" <39612448+yo1956@users.noreply.github.com>
Date: Thu, 1 Feb 2024 22:36:59 +0900
Subject: [PATCH 15/74] GH-39779: [Python] Expose force_virtual_addressing in
PyArrow (#39819)
### Rationale for this change / What changes are included in this PR?
To expose force_virtual_addressing in PyArrow.
### Are these changes tested?
Existing unit tests are not broken, and a new test case have been added.
### Are there any user-facing changes?
pyarrow.fs.S3FileSystem: it becomes possible to specify the argument 'force_virtual_addressing'.
* Closes: #39779
Authored-by: yo1956
Signed-off-by: Joris Van den Bossche
---
python/pyarrow/_s3fs.pyx | 11 ++++++++++-
python/pyarrow/includes/libarrow_fs.pxd | 1 +
python/pyarrow/tests/test_fs.py | 4 ++++
3 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx
index 13b8c748cb8ca..f5bab99a49f7a 100644
--- a/python/pyarrow/_s3fs.pyx
+++ b/python/pyarrow/_s3fs.pyx
@@ -245,6 +245,11 @@ cdef class S3FileSystem(FileSystem):
retry_strategy : S3RetryStrategy, default AwsStandardS3RetryStrategy(max_attempts=3)
The retry strategy to use with S3; fail after max_attempts. Available
strategies are AwsStandardS3RetryStrategy, AwsDefaultS3RetryStrategy.
+ force_virtual_addressing : bool, default False
+ Whether to use virtual addressing of buckets.
+ If true, then virtual addressing is always enabled.
+ If false, then virtual addressing is only enabled if `endpoint_override` is empty.
+ This can be used for non-AWS backends that only support virtual hosted-style access.
Examples
--------
@@ -268,7 +273,9 @@ cdef class S3FileSystem(FileSystem):
role_arn=None, session_name=None, external_id=None,
load_frequency=900, proxy_options=None,
allow_bucket_creation=False, allow_bucket_deletion=False,
- retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy(max_attempts=3)):
+ retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy(
+ max_attempts=3),
+ force_virtual_addressing=False):
cdef:
optional[CS3Options] options
shared_ptr[CS3FileSystem] wrapped
@@ -380,6 +387,7 @@ cdef class S3FileSystem(FileSystem):
options.value().allow_bucket_creation = allow_bucket_creation
options.value().allow_bucket_deletion = allow_bucket_deletion
+ options.value().force_virtual_addressing = force_virtual_addressing
if isinstance(retry_strategy, AwsStandardS3RetryStrategy):
options.value().retry_strategy = CS3RetryStrategy.GetAwsStandardRetryStrategy(
@@ -447,6 +455,7 @@ cdef class S3FileSystem(FileSystem):
opts.proxy_options.username),
'password': frombytes(
opts.proxy_options.password)},
+ force_virtual_addressing=opts.force_virtual_addressing,
),)
)
diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd
index cb30f4e750eff..7876fb0f96671 100644
--- a/python/pyarrow/includes/libarrow_fs.pxd
+++ b/python/pyarrow/includes/libarrow_fs.pxd
@@ -167,6 +167,7 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
c_bool background_writes
c_bool allow_bucket_creation
c_bool allow_bucket_deletion
+ c_bool force_virtual_addressing
shared_ptr[const CKeyValueMetadata] default_metadata
c_string role_arn
c_string session_name
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index ab10addfc3d4c..6ba5137e4f63e 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -1186,6 +1186,10 @@ def test_s3_options(pickle_module):
assert pickle_module.loads(pickle_module.dumps(fs2)) == fs2
assert fs2 != fs
+ fs = S3FileSystem(endpoint_override='localhost:8999', force_virtual_addressing=True)
+ assert isinstance(fs, S3FileSystem)
+ assert pickle_module.loads(pickle_module.dumps(fs)) == fs
+
with pytest.raises(ValueError):
S3FileSystem(access_key='access')
with pytest.raises(ValueError):
From a1c1773b724e4d78faf9a097247c7e976cd2cbfa Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche
Date: Thu, 1 Feb 2024 14:53:35 +0100
Subject: [PATCH 16/74] GH-39555: [Packaging][Python] Enable building pyarrow
against numpy 2.0 (#39557)
### Rationale for this change
Ensure we can build pyarrow against numpy 2.0 nightly (update pyproject.toml to allow this), and test this by building our nightly wheels with numpy nightly. This also ensures that other projects that use our nightly wheels to test together with numpy nightly can do that (numpy 2.0 changes the ABI, so to run with numpy 2.0, your package needs to be built with numpy 2.x; currently pyarrow installed with our nightly wheel will fail to import when also numpy nightly is installed).
See the parent issue https://github.com/apache/arrow/issues/39532 for details, and https://numpy.org/devdocs/dev/depending_on_numpy.html#numpy-2-0-specific-advice for a direct link to the NumPy guidelines on updating build dependencies for NumPy 2.0.
* Closes: #39555
Lead-authored-by: Joris Van den Bossche
Co-authored-by: Antoine Pitrou
Signed-off-by: Joris Van den Bossche
---
ci/docker/python-wheel-manylinux.dockerfile | 5 +++--
ci/docker/python-wheel-windows-vs2017.dockerfile | 3 ++-
ci/scripts/python_wheel_macos_build.sh | 5 ++++-
python/pyproject.toml | 7 ++++++-
python/requirements-build.txt | 3 ++-
python/requirements-wheel-build.txt | 3 ++-
python/setup.py | 2 +-
7 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile
index 0a50d450c225a..a07c727ac76fa 100644
--- a/ci/docker/python-wheel-manylinux.dockerfile
+++ b/ci/docker/python-wheel-manylinux.dockerfile
@@ -28,7 +28,7 @@ ENV MANYLINUX_VERSION=${manylinux}
RUN yum install -y dnf
# Install basic dependencies
-RUN dnf install -y git flex curl autoconf zip perl-IPC-Cmd wget kernel-headers
+RUN dnf install -y git flex curl autoconf zip perl-IPC-Cmd wget
# A system Python is required for ninja and vcpkg in this Dockerfile.
# On manylinux2014 base images, system Python is 2.7.5, while
@@ -97,4 +97,5 @@ SHELL ["/bin/bash", "-i", "-c"]
ENTRYPOINT ["/bin/bash", "-i", "-c"]
COPY python/requirements-wheel-build.txt /arrow/python/
-RUN pip install -r /arrow/python/requirements-wheel-build.txt
+# TODO(GH-39848) Remove the `--pre --extra-index-url` for numpy nightly again before the 16.0 release
+RUN pip install -r /arrow/python/requirements-wheel-build.txt --pre --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
diff --git a/ci/docker/python-wheel-windows-vs2017.dockerfile b/ci/docker/python-wheel-windows-vs2017.dockerfile
index faf07800c956a..067105b3a7995 100644
--- a/ci/docker/python-wheel-windows-vs2017.dockerfile
+++ b/ci/docker/python-wheel-windows-vs2017.dockerfile
@@ -88,7 +88,8 @@ RUN choco install -r -y --no-progress python --version=%PYTHON_VERSION%
RUN python -m pip install -U pip setuptools
COPY python/requirements-wheel-build.txt arrow/python/
-RUN python -m pip install -r arrow/python/requirements-wheel-build.txt
+# TODO(GH-39848) Remove the `--pre --extra-index-url` for numpy nightly again before the 16.0 release
+RUN python -m pip install -r arrow/python/requirements-wheel-build.txt --pre --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
# ENV CLCACHE_DIR="C:\clcache"
# ENV CLCACHE_COMPRESS=1
diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index fd845c512dcdb..8123a9fdf1c48 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -50,12 +50,15 @@ echo "=== (${PYTHON_VERSION}) Install Python build dependencies ==="
export PIP_SITE_PACKAGES=$(python -c 'import site; print(site.getsitepackages()[0])')
export PIP_TARGET_PLATFORM="macosx_${MACOSX_DEPLOYMENT_TARGET//./_}_${arch}"
+# TODO(GH-39848) Remove the `--pre --extra-index-url` for numpy nightly again before the 16.0 release
pip install \
--upgrade \
--only-binary=:all: \
--target $PIP_SITE_PACKAGES \
--platform $PIP_TARGET_PLATFORM \
- -r ${source_dir}/python/requirements-wheel-build.txt
+ -r ${source_dir}/python/requirements-wheel-build.txt \
+ --pre \
+ --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
pip install "delocate>=0.10.3"
echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 437de105ab8e7..9079618ad1c7d 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -18,7 +18,12 @@
[build-system]
requires = [
"cython >= 0.29.31",
- "oldest-supported-numpy>=0.14",
+ # Starting with NumPy 1.25, NumPy is (by default) as far back compatible
+ # as oldest-support-numpy was (customizable with a NPY_TARGET_VERSION
+ # define). For older Python versions (where NumPy 1.25 is not yet avaiable)
+ # continue using oldest-support-numpy.
+ "oldest-supported-numpy>=0.14; python_version<'3.9'",
+ "numpy>=1.25; python_version>='3.9'",
"setuptools_scm < 8.0.0",
"setuptools >= 40.1.0",
"wheel"
diff --git a/python/requirements-build.txt b/python/requirements-build.txt
index 56e9d479ee9ba..e1372e807f88d 100644
--- a/python/requirements-build.txt
+++ b/python/requirements-build.txt
@@ -1,4 +1,5 @@
cython>=0.29.31
-oldest-supported-numpy>=0.14
+oldest-supported-numpy>=0.14; python_version<'3.9'
+numpy>=1.25; python_version>='3.9'
setuptools_scm<8.0.0
setuptools>=38.6.0
diff --git a/python/requirements-wheel-build.txt b/python/requirements-wheel-build.txt
index f42ee4a018f3c..044f9de5f8214 100644
--- a/python/requirements-wheel-build.txt
+++ b/python/requirements-wheel-build.txt
@@ -1,5 +1,6 @@
cython>=0.29.31
-oldest-supported-numpy>=0.14
+oldest-supported-numpy>=0.14; python_version<'3.9'
+numpy>=1.25; python_version>='3.9'
setuptools_scm<8.0.0
setuptools>=58
wheel
diff --git a/python/setup.py b/python/setup.py
index d7a2da2077cdd..098d75a3186af 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -449,7 +449,7 @@ def has_ext_modules(foo):
install_requires = (
- 'numpy >= 1.16.6, <2',
+ 'numpy >= 1.16.6',
)
From 4ceb66101382d74c6ef73ff546fad10183ab58d8 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche
Date: Thu, 1 Feb 2024 14:54:14 +0100
Subject: [PATCH 17/74] GH-39880: [Python][CI] Pin moto<5 for dask integration
tests (#39881)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
See upstream pin being added (https://github.com/dask/dask/pull/10868 / https://github.com/dask/dask/issues/10869), we are seeing the same failures
* Closes: #39880
Lead-authored-by: Joris Van den Bossche
Co-authored-by: Raúl Cumplido
Signed-off-by: Joris Van den Bossche
---
ci/scripts/install_dask.sh | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/ci/scripts/install_dask.sh b/ci/scripts/install_dask.sh
index 8d712a88a6ab1..478c1d5997906 100755
--- a/ci/scripts/install_dask.sh
+++ b/ci/scripts/install_dask.sh
@@ -35,4 +35,5 @@ else
fi
# additional dependencies needed for dask's s3 tests
-pip install moto[server] flask requests
+# Moto 5 results in timeouts in s3 tests: https://github.com/dask/dask/issues/10869
+pip install "moto[server]<5" flask requests
From b684028dfbeeed85d132a1249449a85877d796b1 Mon Sep 17 00:00:00 2001
From: Jonathan Keane
Date: Thu, 1 Feb 2024 08:16:56 -0600
Subject: [PATCH 18/74] GH-39859: [R] Remove macOS from the allow list (#39861)
Originally this was going to also bundle all of our dependencies to send to CRAN, but their webforms don't allow source tars that large (I tried down to 80MB which removed a large number of our dependencies, and that was still rejected by the macbuilder).
This means that on CRAN, if there is no internet, the macOS binary will be minimal. But it means that we build on CRAN using source always.
We should definitely submit this to macbuilder after this merges to main and confirm we get source build by default (since we look to the repo for our allowlist)
* Closes: #39859
Authored-by: Jonathan Keane
Signed-off-by: Jonathan Keane
---
r/tools/nixlibs-allowlist.txt | 1 -
r/tools/nixlibs.R | 2 +-
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/r/tools/nixlibs-allowlist.txt b/r/tools/nixlibs-allowlist.txt
index 9c368e6ed15a2..bd9f0c1b2c084 100644
--- a/r/tools/nixlibs-allowlist.txt
+++ b/r/tools/nixlibs-allowlist.txt
@@ -2,4 +2,3 @@ ubuntu
centos
redhat
rhel
-darwin
diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R
index 17c6ab0a8078b..0af41888b95b7 100644
--- a/r/tools/nixlibs.R
+++ b/r/tools/nixlibs.R
@@ -222,7 +222,7 @@ check_allowlist <- function(os, allowed = "https://raw.githubusercontent.com/apa
# Try a remote allowlist so that we can add/remove without a release
suppressWarnings(readLines(allowed)),
# Fallback to default: allowed only on Ubuntu and CentOS/RHEL
- error = function(e) c("ubuntu", "centos", "redhat", "rhel", "darwin")
+ error = function(e) c("ubuntu", "centos", "redhat", "rhel")
)
# allowlist should contain valid regular expressions (plain strings ok too)
any(grepl(paste(allowlist, collapse = "|"), os))
From 63c7c4a327ff5b27a1ba6838253408e965c0a348 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei
Date: Thu, 1 Feb 2024 23:43:21 +0900
Subject: [PATCH 19/74] GH-39874: [CI][C++][Windows] Use pre-installed OpenSSL
(#39882)
### Rationale for this change
It seems that we can't use OpenSSL via Chocolatey.
```text
openssl v3.2.0 [Approved]
openssl package files install completed. Performing other installation steps.
Attempt to get headers for https://slproweb.com/download/Win64OpenSSL-3_2_0.exe failed.
The remote file either doesn't exist, is unauthorized, or is forbidden for url 'https://slproweb.com/download/Win64OpenSSL-3_2_0.exe'. Exception calling "GetResponse" with "0" argument(s): "The remote server returned an error: (404) Not Found."
Downloading openssl 64 bit
from 'https://slproweb.com/download/Win64OpenSSL-3_2_0.exe'
ERROR: The remote file either doesn't exist, is unauthorized, or is forbidden for url 'https://slproweb.com/download/Win64OpenSSL-3_2_0.exe'. Exception calling "GetResponse" with "0" argument(s): "The remote server returned an error: (404) Not Found."
This package is likely not broken for licensed users - see https://docs.chocolatey.org/en-us/features/private-cdn.
The install of openssl was NOT successful.
Error while running 'C:\ProgramData\chocolatey\lib\openssl\tools\chocolateyinstall.ps1'.
See log for details.
```
### What changes are included in this PR?
Use pre-installed OpenSSL on self-hosted GitHub runner instead.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
No.
* Closes: #39874
Authored-by: Sutou Kouhei
Signed-off-by: Sutou Kouhei
---
.github/workflows/cpp.yml | 4 ----
1 file changed, 4 deletions(-)
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index bd14f1b895bf6..9fbad06692bd2 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -284,10 +284,6 @@ jobs:
/t REG_DWORD `
/d 1 `
/f
- - name: Installed Packages
- run: choco list
- - name: Install Dependencies
- run: choco install -y --no-progress openssl
- name: Checkout Arrow
uses: actions/checkout@v4
with:
From c534749b3230f4ad640fe568d603c665b4bcee3d Mon Sep 17 00:00:00 2001
From: sgilmore10 <74676073+sgilmore10@users.noreply.github.com>
Date: Thu, 1 Feb 2024 10:21:12 -0500
Subject: [PATCH 20/74] GH-39885: [CI][MATLAB] Bump matlab-actions/setup-matlab
and matlab-actions/run-tests from v1 to v2 (#39886)
### Rationale for this change
Upgrading our CI workflows to use the latest versions of [matlab-actions/setup-matlab](https://github.com/matlab-actions/setup-matlab/) and [matlab-actions/run-tests](https://github.com/matlab-actions/run-tests/).
### What changes are included in this PR?
1. Bumped version of `matlab-actions/setup-matlab` from `v1` to `v2`
2. Bumped version of `matlab-actions/runtests-matlab` from `v1` to `v2`
### Are these changes tested?
All MATLAB workflow checks passed.
### Are there any user-facing changes?
No.
* Closes: #39885
Authored-by: Sarah Gilmore
Signed-off-by: Sutou Kouhei
---
.github/workflows/matlab.yml | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/.github/workflows/matlab.yml b/.github/workflows/matlab.yml
index 512ff2bb929b3..eceeb551a0653 100644
--- a/.github/workflows/matlab.yml
+++ b/.github/workflows/matlab.yml
@@ -52,7 +52,7 @@ jobs:
- name: Install ninja-build
run: sudo apt-get install ninja-build
- name: Install MATLAB
- uses: matlab-actions/setup-matlab@v1
+ uses: matlab-actions/setup-matlab@v2
with:
release: R2023a
- name: Install ccache
@@ -85,7 +85,7 @@ jobs:
# Add the installation directory to the MATLAB Search Path by
# setting the MATLABPATH environment variable.
MATLABPATH: matlab/install/arrow_matlab
- uses: matlab-actions/run-tests@v1
+ uses: matlab-actions/run-tests@v2
with:
select-by-folder: matlab/test
macos:
@@ -100,7 +100,7 @@ jobs:
- name: Install ninja-build
run: brew install ninja
- name: Install MATLAB
- uses: matlab-actions/setup-matlab@v1
+ uses: matlab-actions/setup-matlab@v2
with:
release: R2023a
- name: Install ccache
@@ -125,7 +125,7 @@ jobs:
# Add the installation directory to the MATLAB Search Path by
# setting the MATLABPATH environment variable.
MATLABPATH: matlab/install/arrow_matlab
- uses: matlab-actions/run-tests@v1
+ uses: matlab-actions/run-tests@v2
with:
select-by-folder: matlab/test
windows:
@@ -138,7 +138,7 @@ jobs:
with:
fetch-depth: 0
- name: Install MATLAB
- uses: matlab-actions/setup-matlab@v1
+ uses: matlab-actions/setup-matlab@v2
with:
release: R2023a
- name: Download Timezone Database
@@ -171,6 +171,6 @@ jobs:
# Add the installation directory to the MATLAB Search Path by
# setting the MATLABPATH environment variable.
MATLABPATH: matlab/install/arrow_matlab
- uses: matlab-actions/run-tests@v1
+ uses: matlab-actions/run-tests@v2
with:
select-by-folder: matlab/test
From 87b515e9207509aa3f77e3e1c0122be314a77e6d Mon Sep 17 00:00:00 2001
From: Matt Topol
Date: Thu, 1 Feb 2024 11:48:29 -0500
Subject: [PATCH 21/74] GH-39771: [C++][Device] Generic CopyBatchTo/CopyArrayTo
memory types (#39772)
### Rationale for this change
Right now our MemoryManager interfaces operate solely at the buffer level and we do not provide any higher level facilities to copy an entire array or record batch between memory types. We should implement CopyArrayTo and CopyBatchTo functions which recursively utilize the buffer level copying to create a new Array/RecordBatch whose buffers have been copied to the destination memory manager.
### What changes are included in this PR?
Exposing a `CopyArrayTo` and `CopyBatchTo` function for copying entire Array or RecordBatches between memory types.
### Are these changes tested?
Tests are still being written but will be added.
* Closes: #39771
Authored-by: Matt Topol
Signed-off-by: Matt Topol
---
cpp/src/arrow/array/array_base.cc | 12 +++++++++
cpp/src/arrow/array/array_base.h | 16 ++++++++++++
cpp/src/arrow/array/data.cc | 39 ++++++++++++++++++++++++++++
cpp/src/arrow/array/data.h | 19 +++++++++++---
cpp/src/arrow/buffer.h | 2 +-
cpp/src/arrow/c/bridge.cc | 2 +-
cpp/src/arrow/c/bridge_test.cc | 4 ++-
cpp/src/arrow/device.cc | 2 ++
cpp/src/arrow/gpu/cuda_context.cc | 5 ++++
cpp/src/arrow/ipc/read_write_test.cc | 27 +++----------------
cpp/src/arrow/record_batch.cc | 24 +++++++++++++++++
cpp/src/arrow/record_batch.h | 19 ++++++++++++++
12 files changed, 142 insertions(+), 29 deletions(-)
diff --git a/cpp/src/arrow/array/array_base.cc b/cpp/src/arrow/array/array_base.cc
index b483ec420cc3c..6927f51283eb7 100644
--- a/cpp/src/arrow/array/array_base.cc
+++ b/cpp/src/arrow/array/array_base.cc
@@ -307,6 +307,18 @@ Result> Array::View(
return MakeArray(result);
}
+Result> Array::CopyTo(
+ const std::shared_ptr& to) const {
+ ARROW_ASSIGN_OR_RAISE(auto copied_data, data()->CopyTo(to));
+ return MakeArray(copied_data);
+}
+
+Result> Array::ViewOrCopyTo(
+ const std::shared_ptr& to) const {
+ ARROW_ASSIGN_OR_RAISE(auto new_data, data()->ViewOrCopyTo(to));
+ return MakeArray(new_data);
+}
+
// ----------------------------------------------------------------------
// NullArray
diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h
index 7e857bf20568e..6411aebf80442 100644
--- a/cpp/src/arrow/array/array_base.h
+++ b/cpp/src/arrow/array/array_base.h
@@ -165,6 +165,22 @@ class ARROW_EXPORT Array {
/// An error is returned if the types are not layout-compatible.
Result> View(const std::shared_ptr& type) const;
+ /// \brief Construct a copy of the array with all buffers on destination
+ /// Memory Manager
+ ///
+ /// This method recursively copies the array's buffers and those of its children
+ /// onto the destination MemoryManager device and returns the new Array.
+ Result> CopyTo(const std::shared_ptr& to) const;
+
+ /// \brief Construct a new array attempting to zero-copy view if possible.
+ ///
+ /// Like CopyTo this method recursively goes through all of the array's buffers
+ /// and those of it's children and first attempts to create zero-copy
+ /// views on the destination MemoryManager device. If it can't, it falls back
+ /// to performing a copy. See Buffer::ViewOrCopy.
+ Result> ViewOrCopyTo(
+ const std::shared_ptr& to) const;
+
/// Construct a zero-copy slice of the array with the indicated offset and
/// length
///
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index 8454ac8f1d5fb..80c411dfa6a6d 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -27,6 +27,7 @@
#include "arrow/array/util.h"
#include "arrow/buffer.h"
+#include "arrow/device.h"
#include "arrow/scalar.h"
#include "arrow/status.h"
#include "arrow/type.h"
@@ -36,6 +37,7 @@
#include "arrow/util/dict_util.h"
#include "arrow/util/logging.h"
#include "arrow/util/macros.h"
+#include "arrow/util/range.h"
#include "arrow/util/ree_util.h"
#include "arrow/util/slice_util_internal.h"
#include "arrow/util/union_util.h"
@@ -140,6 +142,43 @@ std::shared_ptr ArrayData::Make(std::shared_ptr type, int64
return std::make_shared(std::move(type), length, null_count, offset);
}
+namespace {
+template
+Result> CopyToImpl(const ArrayData& data,
+ const std::shared_ptr& to,
+ Fn&& copy_fn) {
+ auto output = ArrayData::Make(data.type, data.length, data.null_count, data.offset);
+ output->buffers.resize(data.buffers.size());
+ for (auto&& [buf, out_buf] : internal::Zip(data.buffers, output->buffers)) {
+ if (buf) {
+ ARROW_ASSIGN_OR_RAISE(out_buf, copy_fn(buf, to));
+ }
+ }
+
+ output->child_data.reserve(data.child_data.size());
+ for (const auto& child : data.child_data) {
+ ARROW_ASSIGN_OR_RAISE(auto copied, CopyToImpl(*child, to, copy_fn));
+ output->child_data.push_back(std::move(copied));
+ }
+
+ if (data.dictionary) {
+ ARROW_ASSIGN_OR_RAISE(output->dictionary, CopyToImpl(*data.dictionary, to, copy_fn));
+ }
+
+ return output;
+}
+} // namespace
+
+Result> ArrayData::CopyTo(
+ const std::shared_ptr& to) const {
+ return CopyToImpl(*this, to, MemoryManager::CopyBuffer);
+}
+
+Result> ArrayData::ViewOrCopyTo(
+ const std::shared_ptr& to) const {
+ return CopyToImpl(*this, to, Buffer::ViewOrCopy);
+}
+
std::shared_ptr ArrayData::Slice(int64_t off, int64_t len) const {
ARROW_CHECK_LE(off, length) << "Slice offset (" << off
<< ") greater than array length (" << length << ")";
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index edd443adc43c4..d8a6663cec580 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -27,6 +27,7 @@
#include "arrow/buffer.h"
#include "arrow/result.h"
#include "arrow/type.h"
+#include "arrow/type_fwd.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/macros.h"
#include "arrow/util/span.h"
@@ -34,9 +35,6 @@
namespace arrow {
-class Array;
-struct ArrayData;
-
namespace internal {
// ----------------------------------------------------------------------
// Null handling for types without a validity bitmap and the dictionary type
@@ -183,6 +181,21 @@ struct ARROW_EXPORT ArrayData {
std::shared_ptr Copy() const { return std::make_shared(*this); }
+ /// \brief Copy all buffers and children recursively to destination MemoryManager
+ ///
+ /// This utilizes MemoryManager::CopyBuffer to create a new ArrayData object
+ /// recursively copying the buffers and all child buffers to the destination
+ /// memory manager. This includes dictionaries if applicable.
+ Result> CopyTo(
+ const std::shared_ptr& to) const;
+ /// \brief View or Copy this ArrayData to destination memory manager.
+ ///
+ /// Tries to view the buffer contents on the given memory manager's device
+ /// if possible (to avoid a copy) but falls back to copying if a no-copy view
+ /// isn't supported.
+ Result> ViewOrCopyTo(
+ const std::shared_ptr& to) const;
+
bool IsNull(int64_t i) const { return !IsValid(i); }
bool IsValid(int64_t i) const {
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 52fd94ec1f7d4..258a9faac7361 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -359,7 +359,7 @@ class ARROW_EXPORT Buffer {
static Result> ViewOrCopy(
std::shared_ptr source, const std::shared_ptr& to);
- virtual std::shared_ptr device_sync_event() { return NULLPTR; }
+ virtual std::shared_ptr device_sync_event() const { return NULLPTR; }
protected:
bool is_mutable_;
diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 238afb0328672..172ed8962ce77 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -1466,7 +1466,7 @@ class ImportedBuffer : public Buffer {
~ImportedBuffer() override = default;
- std::shared_ptr device_sync_event() override {
+ std::shared_ptr device_sync_event() const override {
return import_->device_sync_;
}
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 58bbc9282c204..321ec36c38d8c 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -1282,7 +1282,9 @@ class MyBuffer final : public MutableBuffer {
default_memory_pool()->Free(const_cast(data_), size_);
}
- std::shared_ptr device_sync_event() override { return device_sync_; }
+ std::shared_ptr device_sync_event() const override {
+ return device_sync_;
+ }
protected:
std::shared_ptr device_sync_;
diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc
index de709923dc44e..616f89aae896f 100644
--- a/cpp/src/arrow/device.cc
+++ b/cpp/src/arrow/device.cc
@@ -20,8 +20,10 @@
#include
#include
+#include "arrow/array.h"
#include "arrow/buffer.h"
#include "arrow/io/memory.h"
+#include "arrow/record_batch.h"
#include "arrow/result.h"
#include "arrow/util/logging.h"
diff --git a/cpp/src/arrow/gpu/cuda_context.cc b/cpp/src/arrow/gpu/cuda_context.cc
index 81542d339bd70..988cc1f25b91c 100644
--- a/cpp/src/arrow/gpu/cuda_context.cc
+++ b/cpp/src/arrow/gpu/cuda_context.cc
@@ -433,6 +433,11 @@ Result> CudaMemoryManager::CopyBufferTo(
Result> CudaMemoryManager::CopyNonOwnedTo(
const Buffer& buf, const std::shared_ptr& to) {
if (to->is_cpu()) {
+ auto sync_event = buf.device_sync_event();
+ if (sync_event) {
+ RETURN_NOT_OK(sync_event->Wait());
+ }
+
// Device-to-CPU copy
std::unique_ptr dest;
ARROW_ASSIGN_OR_RAISE(auto from_context, cuda_device()->GetContext());
diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc
index bd2c2b716d502..c5075299a3e35 100644
--- a/cpp/src/arrow/ipc/read_write_test.cc
+++ b/cpp/src/arrow/ipc/read_write_test.cc
@@ -1336,30 +1336,11 @@ class CopyCollectListener : public CollectListener {
Status OnRecordBatchWithMetadataDecoded(
RecordBatchWithMetadata record_batch_with_metadata) override {
- auto& record_batch = record_batch_with_metadata.batch;
- for (auto column_data : record_batch->column_data()) {
- ARROW_RETURN_NOT_OK(CopyArrayData(column_data));
- }
- return CollectListener::OnRecordBatchWithMetadataDecoded(record_batch_with_metadata);
- }
+ ARROW_ASSIGN_OR_RAISE(
+ record_batch_with_metadata.batch,
+ record_batch_with_metadata.batch->CopyTo(default_cpu_memory_manager()));
- private:
- Status CopyArrayData(std::shared_ptr data) {
- auto& buffers = data->buffers;
- for (size_t i = 0; i < buffers.size(); ++i) {
- auto& buffer = buffers[i];
- if (!buffer) {
- continue;
- }
- ARROW_ASSIGN_OR_RAISE(buffers[i], Buffer::Copy(buffer, buffer->memory_manager()));
- }
- for (auto child_data : data->child_data) {
- ARROW_RETURN_NOT_OK(CopyArrayData(child_data));
- }
- if (data->dictionary) {
- ARROW_RETURN_NOT_OK(CopyArrayData(data->dictionary));
- }
- return Status::OK();
+ return CollectListener::OnRecordBatchWithMetadataDecoded(record_batch_with_metadata);
}
};
diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index 457135fa400d5..ca6b45af3d6b4 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -357,6 +357,30 @@ Status ValidateBatch(const RecordBatch& batch, bool full_validation) {
} // namespace
+Result> RecordBatch::CopyTo(
+ const std::shared_ptr& to) const {
+ ArrayVector copied_columns;
+ copied_columns.reserve(num_columns());
+ for (const auto& col : columns()) {
+ ARROW_ASSIGN_OR_RAISE(auto c, col->CopyTo(to));
+ copied_columns.push_back(std::move(c));
+ }
+
+ return Make(schema_, num_rows(), std::move(copied_columns));
+}
+
+Result> RecordBatch::ViewOrCopyTo(
+ const std::shared_ptr& to) const {
+ ArrayVector copied_columns;
+ copied_columns.reserve(num_columns());
+ for (const auto& col : columns()) {
+ ARROW_ASSIGN_OR_RAISE(auto c, col->ViewOrCopyTo(to));
+ copied_columns.push_back(std::move(c));
+ }
+
+ return Make(schema_, num_rows(), std::move(copied_columns));
+}
+
Status RecordBatch::Validate() const {
return ValidateBatch(*this, /*full_validation=*/false);
}
diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index 1a66fc3fb5629..79f93a7b5997f 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -186,6 +186,25 @@ class ARROW_EXPORT RecordBatch {
/// \return the number of rows (the corresponding length of each column)
int64_t num_rows() const { return num_rows_; }
+ /// \brief Copy the entire RecordBatch to destination MemoryManager
+ ///
+ /// This uses Array::CopyTo on each column of the record batch to create
+ /// a new record batch where all underlying buffers for the columns have
+ /// been copied to the destination MemoryManager. This uses
+ /// MemoryManager::CopyBuffer under the hood.
+ Result> CopyTo(
+ const std::shared_ptr& to) const;
+
+ /// \brief View or Copy the entire RecordBatch to destination MemoryManager
+ ///
+ /// This uses Array::ViewOrCopyTo on each column of the record batch to create
+ /// a new record batch where all underlying buffers for the columns have
+ /// been zero-copy viewed on the destination MemoryManager, falling back
+ /// to performing a copy if it can't be viewed as a zero-copy buffer. This uses
+ /// Buffer::ViewOrCopy under the hood.
+ Result> ViewOrCopyTo(
+ const std::shared_ptr& to) const;
+
/// \brief Slice each of the arrays in the record batch
/// \param[in] offset the starting offset to slice, through end of batch
/// \return new record batch
From f9b7ac2e922bceed8bab09b1e28d7261cbe8b41d Mon Sep 17 00:00:00 2001
From: Vibhatha Lakmal Abeykoon
Date: Thu, 1 Feb 2024 23:08:21 +0530
Subject: [PATCH 22/74] GH-37841: [Java] Dictionary decoding not using the
compression factory from the ArrowReader (#38371)
### Rationale for this change
This PR addresses https://github.com/apache/arrow/issues/37841.
### What changes are included in this PR?
Adding compression-based write and read for Dictionary data.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
No
* Closes: #37841
Lead-authored-by: Vibhatha Lakmal Abeykoon
Co-authored-by: vibhatha
Signed-off-by: David Li
---
.../TestArrowReaderWriterWithCompression.java | 206 ++++++++++++++++--
.../apache/arrow/vector/ipc/ArrowReader.java | 2 +-
.../apache/arrow/vector/ipc/ArrowWriter.java | 23 +-
3 files changed, 201 insertions(+), 30 deletions(-)
diff --git a/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java b/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java
index 6104cb1a132e4..af28333746290 100644
--- a/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java
+++ b/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java
@@ -18,7 +18,9 @@
package org.apache.arrow.compression;
import java.io.ByteArrayOutputStream;
+import java.io.IOException;
import java.nio.channels.Channels;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -27,63 +29,223 @@
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.GenerateSampleData;
+import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.compression.CompressionUtil;
import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
import org.apache.arrow.vector.ipc.ArrowFileReader;
import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
import org.apache.arrow.vector.ipc.message.IpcOption;
import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel;
+import org.junit.After;
import org.junit.Assert;
-import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
public class TestArrowReaderWriterWithCompression {
- @Test
- public void testArrowFileZstdRoundTrip() throws Exception {
- // Prepare sample data
- final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ private BufferAllocator allocator;
+ private ByteArrayOutputStream out;
+ private VectorSchemaRoot root;
+
+ @BeforeEach
+ public void setup() {
+ if (allocator == null) {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+ out = new ByteArrayOutputStream();
+ root = null;
+ }
+
+ @After
+ public void tearDown() {
+ if (root != null) {
+ root.close();
+ }
+ if (allocator != null) {
+ allocator.close();
+ }
+ if (out != null) {
+ out.reset();
+ }
+
+ }
+
+ private void createAndWriteArrowFile(DictionaryProvider provider,
+ CompressionUtil.CodecType codecType) throws IOException {
List fields = new ArrayList<>();
fields.add(new Field("col", FieldType.notNullable(new ArrowType.Utf8()), new ArrayList<>()));
- VectorSchemaRoot root = VectorSchemaRoot.create(new Schema(fields), allocator);
+ root = VectorSchemaRoot.create(new Schema(fields), allocator);
+
final int rowCount = 10;
GenerateSampleData.generateTestData(root.getVector(0), rowCount);
root.setRowCount(rowCount);
- // Write an in-memory compressed arrow file
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- try (final ArrowFileWriter writer =
- new ArrowFileWriter(root, null, Channels.newChannel(out), new HashMap<>(),
- IpcOption.DEFAULT, CommonsCompressionFactory.INSTANCE, CompressionUtil.CodecType.ZSTD, Optional.of(7))) {
+ try (final ArrowFileWriter writer = new ArrowFileWriter(root, provider, Channels.newChannel(out),
+ new HashMap<>(), IpcOption.DEFAULT, CommonsCompressionFactory.INSTANCE, codecType, Optional.of(7))) {
writer.start();
writer.writeBatch();
writer.end();
}
+ }
+
+ private void createAndWriteArrowStream(DictionaryProvider provider,
+ CompressionUtil.CodecType codecType) throws IOException {
+ List fields = new ArrayList<>();
+ fields.add(new Field("col", FieldType.notNullable(new ArrowType.Utf8()), new ArrayList<>()));
+ root = VectorSchemaRoot.create(new Schema(fields), allocator);
+
+ final int rowCount = 10;
+ GenerateSampleData.generateTestData(root.getVector(0), rowCount);
+ root.setRowCount(rowCount);
+
+ try (final ArrowStreamWriter writer = new ArrowStreamWriter(root, provider, Channels.newChannel(out),
+ IpcOption.DEFAULT, CommonsCompressionFactory.INSTANCE, codecType, Optional.of(7))) {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+ }
+ }
- // Read the in-memory compressed arrow file with CommonsCompressionFactory provided
+ private Dictionary createDictionary(VarCharVector dictionaryVector) {
+ setVector(dictionaryVector,
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "bar".getBytes(StandardCharsets.UTF_8),
+ "baz".getBytes(StandardCharsets.UTF_8));
+
+ return new Dictionary(dictionaryVector,
+ new DictionaryEncoding(/*id=*/1L, /*ordered=*/false, /*indexType=*/null));
+ }
+
+ @Test
+ public void testArrowFileZstdRoundTrip() throws Exception {
+ createAndWriteArrowFile(null, CompressionUtil.CodecType.ZSTD);
+ // with compression
+ try (ArrowFileReader reader =
+ new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+ CommonsCompressionFactory.INSTANCE)) {
+ Assertions.assertEquals(1, reader.getRecordBlocks().size());
+ Assertions.assertTrue(reader.loadNextBatch());
+ Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot()));
+ Assertions.assertFalse(reader.loadNextBatch());
+ }
+ // without compression
try (ArrowFileReader reader =
- new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()),
- allocator, CommonsCompressionFactory.INSTANCE)) {
- Assert.assertEquals(1, reader.getRecordBlocks().size());
+ new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+ NoCompressionCodec.Factory.INSTANCE)) {
+ Assertions.assertEquals(1, reader.getRecordBlocks().size());
+ Exception exception = Assert.assertThrows(IllegalArgumentException.class,
+ reader::loadNextBatch);
+ Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
+ exception.getMessage());
+ }
+ }
+
+ @Test
+ public void testArrowStreamZstdRoundTrip() throws Exception {
+ createAndWriteArrowStream(null, CompressionUtil.CodecType.ZSTD);
+ // with compression
+ try (ArrowStreamReader reader =
+ new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+ CommonsCompressionFactory.INSTANCE)) {
Assert.assertTrue(reader.loadNextBatch());
Assert.assertTrue(root.equals(reader.getVectorSchemaRoot()));
Assert.assertFalse(reader.loadNextBatch());
}
+ // without compression
+ try (ArrowStreamReader reader =
+ new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+ NoCompressionCodec.Factory.INSTANCE)) {
+ Exception exception = Assert.assertThrows(IllegalArgumentException.class,
+ reader::loadNextBatch);
+ Assert.assertEquals(
+ "Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
+ exception.getMessage()
+ );
+ }
+ }
- // Read the in-memory compressed arrow file without CompressionFactory provided
+ @Test
+ public void testArrowFileZstdRoundTripWithDictionary() throws Exception {
+ VarCharVector dictionaryVector = (VarCharVector)
+ FieldType.nullable(new ArrowType.Utf8()).createNewSingleVector("f1_file", allocator, null);
+ Dictionary dictionary = createDictionary(dictionaryVector);
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ provider.put(dictionary);
+
+ createAndWriteArrowFile(provider, CompressionUtil.CodecType.ZSTD);
+
+ // with compression
+ try (ArrowFileReader reader =
+ new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+ CommonsCompressionFactory.INSTANCE)) {
+ Assertions.assertEquals(1, reader.getRecordBlocks().size());
+ Assertions.assertTrue(reader.loadNextBatch());
+ Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot()));
+ Assertions.assertFalse(reader.loadNextBatch());
+ }
+ // without compression
try (ArrowFileReader reader =
- new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()),
- allocator, NoCompressionCodec.Factory.INSTANCE)) {
- Assert.assertEquals(1, reader.getRecordBlocks().size());
+ new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+ NoCompressionCodec.Factory.INSTANCE)) {
+ Assertions.assertEquals(1, reader.getRecordBlocks().size());
+ Exception exception = Assert.assertThrows(IllegalArgumentException.class,
+ reader::loadNextBatch);
+ Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
+ exception.getMessage());
+ }
+ dictionaryVector.close();
+ }
+
+ @Test
+ public void testArrowStreamZstdRoundTripWithDictionary() throws Exception {
+ VarCharVector dictionaryVector = (VarCharVector)
+ FieldType.nullable(new ArrowType.Utf8()).createNewSingleVector("f1_stream", allocator, null);
+ Dictionary dictionary = createDictionary(dictionaryVector);
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ provider.put(dictionary);
+
+ createAndWriteArrowStream(provider, CompressionUtil.CodecType.ZSTD);
+
+ // with compression
+ try (ArrowStreamReader reader =
+ new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+ CommonsCompressionFactory.INSTANCE)) {
+ Assertions.assertTrue(reader.loadNextBatch());
+ Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot()));
+ Assertions.assertFalse(reader.loadNextBatch());
+ }
+ // without compression
+ try (ArrowStreamReader reader =
+ new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+ NoCompressionCodec.Factory.INSTANCE)) {
+ Exception exception = Assert.assertThrows(IllegalArgumentException.class,
+ reader::loadNextBatch);
+ Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
+ exception.getMessage());
+ }
+ dictionaryVector.close();
+ }
- Exception exception = Assert.assertThrows(IllegalArgumentException.class, () -> reader.loadNextBatch());
- String expectedMessage = "Please add arrow-compression module to use CommonsCompressionFactory for ZSTD";
- Assert.assertEquals(expectedMessage, exception.getMessage());
+ public static void setVector(VarCharVector vector, byte[]... values) {
+ final int length = values.length;
+ vector.allocateNewSafe();
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
}
+ vector.setValueCount(length);
}
}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java
index 04c57d7e82fef..01f4e925c69b3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java
@@ -251,7 +251,7 @@ private void load(ArrowDictionaryBatch dictionaryBatch, FieldVector vector) {
VectorSchemaRoot root = new VectorSchemaRoot(
Collections.singletonList(vector.getField()),
Collections.singletonList(vector), 0);
- VectorLoader loader = new VectorLoader(root);
+ VectorLoader loader = new VectorLoader(root, this.compressionFactory);
try {
loader.load(dictionaryBatch.getDictionary());
} finally {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java
index a33c55de53f23..1cc201ae56f4b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java
@@ -61,9 +61,14 @@ public abstract class ArrowWriter implements AutoCloseable {
private final DictionaryProvider dictionaryProvider;
private final Set dictionaryIdsUsed = new HashSet<>();
+ private final CompressionCodec.Factory compressionFactory;
+ private final CompressionUtil.CodecType codecType;
+ private final Optional compressionLevel;
private boolean started = false;
private boolean ended = false;
+ private final CompressionCodec codec;
+
protected IpcOption option;
protected ArrowWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out) {
@@ -89,16 +94,19 @@ protected ArrowWriter(VectorSchemaRoot root, DictionaryProvider provider, Writab
protected ArrowWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out, IpcOption option,
CompressionCodec.Factory compressionFactory, CompressionUtil.CodecType codecType,
Optional compressionLevel) {
- this.unloader = new VectorUnloader(
- root, /*includeNullCount*/ true,
- compressionLevel.isPresent() ?
- compressionFactory.createCodec(codecType, compressionLevel.get()) :
- compressionFactory.createCodec(codecType),
- /*alignBuffers*/ true);
this.out = new WriteChannel(out);
this.option = option;
this.dictionaryProvider = provider;
+ this.compressionFactory = compressionFactory;
+ this.codecType = codecType;
+ this.compressionLevel = compressionLevel;
+ this.codec = this.compressionLevel.isPresent() ?
+ this.compressionFactory.createCodec(this.codecType, this.compressionLevel.get()) :
+ this.compressionFactory.createCodec(this.codecType);
+ this.unloader = new VectorUnloader(root, /*includeNullCount*/ true, codec,
+ /*alignBuffers*/ true);
+
List fields = new ArrayList<>(root.getSchema().getFields().size());
MetadataV4UnionChecker.checkForUnion(root.getSchema().getFields().iterator(), option.metadataVersion);
@@ -133,7 +141,8 @@ protected void writeDictionaryBatch(Dictionary dictionary) throws IOException {
Collections.singletonList(vector.getField()),
Collections.singletonList(vector),
count);
- VectorUnloader unloader = new VectorUnloader(dictRoot);
+ VectorUnloader unloader = new VectorUnloader(dictRoot, /*includeNullCount*/ true, this.codec,
+ /*alignBuffers*/ true);
ArrowRecordBatch batch = unloader.getRecordBatch();
ArrowDictionaryBatch dictionaryBatch = new ArrowDictionaryBatch(id, batch, false);
try {
From a57363867a6d88d0a7f17767571ab57dbb70cbfd Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 1 Feb 2024 13:33:45 -0500
Subject: [PATCH 23/74] MINOR: [JS] Bump esbuild from 0.19.2 to 0.20.0 in /js
(#39891)
Bumps [esbuild](https://github.com/evanw/esbuild) from 0.19.2 to 0.20.0.
Release notes
Sourced from esbuild's
releases.
v0.20.0
This release deliberately contains backwards-incompatible
changes. To avoid automatically picking up releases like this,
you should either be pinning the exact version of esbuild
in your package.json
file (recommended) or be using a
version range syntax that only accepts patch upgrades such as
^0.19.0
or ~0.19.0
. See npm's documentation
about semver for
more information.
This time there is only one breaking change, and it only matters for
people using Deno. Deno tests that use esbuild will now fail unless you
make the change described below.
-
Work around API deprecations in Deno 1.40.x (#3609,
#3611)
Deno 1.40.0 was just
released and introduced run-time warnings about certain APIs that
esbuild uses. With this release, esbuild will work around these run-time
warnings by using newer APIs if they are present and falling back to the
original APIs otherwise. This should avoid the warnings without breaking
compatibility with older versions of Deno.
Unfortunately, doing this introduces a breaking change. The newer
child process APIs lack a way to synchronously terminate esbuild's child
process, so calling esbuild.stop()
from within a Deno test
is no longer sufficient to prevent Deno from failing a test that uses
esbuild's API (Deno fails tests that create a child process without
killing it before the test ends). To work around this, esbuild's
stop()
function has been changed to return a promise, and
you now have to change esbuild.stop()
to await
esbuild.stop()
in all of your Deno tests.
-
Reorder implicit file extensions within node_modules
(#3341,
#3608)
In version
0.18.0, esbuild changed the behavior of implicit file extensions
within node_modules
directories (i.e. in published
packages) to prefer .js
over .ts
even when the
--resolve-extensions=
order prefers .ts
over
.js
(which it does by default). However, doing that also
accidentally made esbuild prefer .css
over
.ts
, which caused problems for people that published
packages containing both TypeScript and CSS in files with the same
name.
With this release, esbuild will reorder TypeScript file extensions
immediately after the last JavaScript file extensions in the implicit
file extension order instead of putting them at the end of the order.
Specifically the default implicit file extension order is
.tsx,.ts,.jsx,.js,.css,.json
which used to become
.jsx,.js,.css,.json,.tsx,.ts
in node_modules
directories. With this release it will now become
.jsx,.js,.tsx,.ts,.css,.json
instead.
Why even rewrite the implicit file extension order at all? One reason
is because the .js
file is more likely to behave correctly
than the .ts
file. The behavior of the .ts
file may depend on tsconfig.json
and the
tsconfig.json
file may not even be published, or may use
extends
to refer to a base tsconfig.json
file
that wasn't published. People can get into this situation when they
forget to add all .ts
files to their
.npmignore
file before publishing to npm. Picking
.js
over .ts
helps make it more likely that
resulting bundle will behave correctly.
v0.19.12
-
The "preserve" JSX mode now preserves JSX text verbatim (#3605)
The JSX specification
deliberately doesn't specify how JSX text is supposed to be interpreted
and there is no canonical way to interpret JSX text. Two most popular
interpretations are Babel and TypeScript. Yes they
are different (esbuild deliberately
follows TypeScript by the way).
Previously esbuild normalized text to the TypeScript interpretation
when the "preserve" JSX mode is active. However,
"preserve" should arguably reproduce the original JSX text
verbatim so that whatever JSX transform runs after esbuild is free to
interpret it however it wants. So with this release, esbuild will now
pass JSX text through unmodified:
// Original code
let el =
<a href={'/'} title=''"'> some text
{foo}
more text </a>
// Old output (with --loader=jsx --jsx=preserve)
let el = <a href="/" title={'"
}>
{" some text"}
{foo}
{"more text "}
</a>;
// New output (with --loader=jsx --jsx=preserve)
let el = <a href={"/"} title=''"'>
some text
{foo}
more text </a>;
Allow JSX elements as JSX attribute values
JSX has an obscure feature where you can use JSX elements in
attribute position without surrounding them with {...}
. It
looks like this:
... (truncated)
Changelog
Sourced from esbuild's
changelog.
0.20.0
This release deliberately contains backwards-incompatible
changes. To avoid automatically picking up releases like this,
you should either be pinning the exact version of esbuild
in your package.json
file (recommended) or be using a
version range syntax that only accepts patch upgrades such as
^0.19.0
or ~0.19.0
. See npm's documentation
about semver for
more information.
This time there is only one breaking change, and it only matters for
people using Deno. Deno tests that use esbuild will now fail unless you
make the change described below.
-
Work around API deprecations in Deno 1.40.x (#3609,
#3611)
Deno 1.40.0 was just
released and introduced run-time warnings about certain APIs that
esbuild uses. With this release, esbuild will work around these run-time
warnings by using newer APIs if they are present and falling back to the
original APIs otherwise. This should avoid the warnings without breaking
compatibility with older versions of Deno.
Unfortunately, doing this introduces a breaking change. The newer
child process APIs lack a way to synchronously terminate esbuild's child
process, so calling esbuild.stop()
from within a Deno test
is no longer sufficient to prevent Deno from failing a test that uses
esbuild's API (Deno fails tests that create a child process without
killing it before the test ends). To work around this, esbuild's
stop()
function has been changed to return a promise, and
you now have to change esbuild.stop()
to await
esbuild.stop()
in all of your Deno tests.
-
Reorder implicit file extensions within node_modules
(#3341,
#3608)
In version
0.18.0, esbuild changed the behavior of implicit file extensions
within node_modules
directories (i.e. in published
packages) to prefer .js
over .ts
even when the
--resolve-extensions=
order prefers .ts
over
.js
(which it does by default). However, doing that also
accidentally made esbuild prefer .css
over
.ts
, which caused problems for people that published
packages containing both TypeScript and CSS in files with the same
name.
With this release, esbuild will reorder TypeScript file extensions
immediately after the last JavaScript file extensions in the implicit
file extension order instead of putting them at the end of the order.
Specifically the default implicit file extension order is
.tsx,.ts,.jsx,.js,.css,.json
which used to become
.jsx,.js,.css,.json,.tsx,.ts
in node_modules
directories. With this release it will now become
.jsx,.js,.tsx,.ts,.css,.json
instead.
Why even rewrite the implicit file extension order at all? One reason
is because the .js
file is more likely to behave correctly
than the .ts
file. The behavior of the .ts
file may depend on tsconfig.json
and the
tsconfig.json
file may not even be published, or may use
extends
to refer to a base tsconfig.json
file
that wasn't published. People can get into this situation when they
forget to add all .ts
files to their
.npmignore
file before publishing to npm. Picking
.js
over .ts
helps make it more likely that
resulting bundle will behave correctly.
0.19.12
-
The "preserve" JSX mode now preserves JSX text verbatim (#3605)
The JSX specification
deliberately doesn't specify how JSX text is supposed to be interpreted
and there is no canonical way to interpret JSX text. Two most popular
interpretations are Babel and TypeScript. Yes they
are different (esbuild deliberately
follows TypeScript by the way).
Previously esbuild normalized text to the TypeScript interpretation
when the "preserve" JSX mode is active. However,
"preserve" should arguably reproduce the original JSX text
verbatim so that whatever JSX transform runs after esbuild is free to
interpret it however it wants. So with this release, esbuild will now
pass JSX text through unmodified:
// Original code
let el =
<a href={'/'} title=''"'> some text
{foo}
more text </a>
// Old output (with --loader=jsx --jsx=preserve)
let el = <a href="/" title={'"
}>
{" some text"}
{foo}
{"more text "}
</a>;
// New output (with --loader=jsx --jsx=preserve)
let el = <a href={"/"} title=''"'>
some text
{foo}
more text </a>;
Allow JSX elements as JSX attribute values
... (truncated)
Commits
[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=esbuild&package-manager=npm_and_yarn&previous-version=0.19.2&new-version=0.20.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)
Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.
[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)
---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
js/package.json | 2 +-
js/yarn.lock | 234 +++++++++++++++++++++++++-----------------------
2 files changed, 121 insertions(+), 115 deletions(-)
diff --git a/js/package.json b/js/package.json
index 57f9267afa3a8..f96764d82245e 100644
--- a/js/package.json
+++ b/js/package.json
@@ -79,7 +79,7 @@
"cross-env": "7.0.3",
"del": "7.1.0",
"del-cli": "5.1.0",
- "esbuild": "0.19.2",
+ "esbuild": "0.20.0",
"esbuild-plugin-alias": "0.2.1",
"eslint": "8.52.0",
"eslint-plugin-jest": "27.4.2",
diff --git a/js/yarn.lock b/js/yarn.lock
index 10d2a256e1cac..e7dead09bf8bb 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -416,225 +416,230 @@
resolved "https://registry.npmjs.org/@discoveryjs/json-ext/-/json-ext-0.5.7.tgz#1d572bfbbe14b7704e0ba0f39b74815b84870d70"
integrity sha512-dBVuXR082gk3jsFp7Rd/JI4kytwGHecnCoTtXFb7DB6CNHp4rg5k1bhg0nWdLGLnOV71lmDzGQaLMy8iPLY0pw==
+"@esbuild/aix-ppc64@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.20.0.tgz#509621cca4e67caf0d18561a0c56f8b70237472f"
+ integrity sha512-fGFDEctNh0CcSwsiRPxiaqX0P5rq+AqE0SRhYGZ4PX46Lg1FNR6oCxJghf8YgY0WQEgQuh3lErUFE4KxLeRmmw==
+
"@esbuild/android-arm64@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.17.19.tgz#bafb75234a5d3d1b690e7c2956a599345e84a2fd"
integrity sha512-KBMWvEZooR7+kzY0BtbTQn0OAYY7CsiydT63pVEaPtVYF0hXbUaOyZog37DKxK7NF3XacBJOpYT4adIJh+avxA==
-"@esbuild/android-arm64@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.19.2.tgz#bc35990f412a749e948b792825eef7df0ce0e073"
- integrity sha512-lsB65vAbe90I/Qe10OjkmrdxSX4UJDjosDgb8sZUKcg3oefEuW2OT2Vozz8ef7wrJbMcmhvCC+hciF8jY/uAkw==
+"@esbuild/android-arm64@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.20.0.tgz#109a6fdc4a2783fc26193d2687827045d8fef5ab"
+ integrity sha512-aVpnM4lURNkp0D3qPoAzSG92VXStYmoVPOgXveAUoQBWRSuQzt51yvSju29J6AHPmwY1BjH49uR29oyfH1ra8Q==
"@esbuild/android-arm@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.17.19.tgz#5898f7832c2298bc7d0ab53701c57beb74d78b4d"
integrity sha512-rIKddzqhmav7MSmoFCmDIb6e2W57geRsM94gV2l38fzhXMwq7hZoClug9USI2pFRGL06f4IOPHHpFNOkWieR8A==
-"@esbuild/android-arm@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.19.2.tgz#edd1c8f23ba353c197f5b0337123c58ff2a56999"
- integrity sha512-tM8yLeYVe7pRyAu9VMi/Q7aunpLwD139EY1S99xbQkT4/q2qa6eA4ige/WJQYdJ8GBL1K33pPFhPfPdJ/WzT8Q==
+"@esbuild/android-arm@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.20.0.tgz#1397a2c54c476c4799f9b9073550ede496c94ba5"
+ integrity sha512-3bMAfInvByLHfJwYPJRlpTeaQA75n8C/QKpEaiS4HrFWFiJlNI0vzq/zCjBrhAYcPyVPG7Eo9dMrcQXuqmNk5g==
"@esbuild/android-x64@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.17.19.tgz#658368ef92067866d95fb268719f98f363d13ae1"
integrity sha512-uUTTc4xGNDT7YSArp/zbtmbhO0uEEK9/ETW29Wk1thYUJBz3IVnvgEiEwEa9IeLyvnpKrWK64Utw2bgUmDveww==
-"@esbuild/android-x64@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.19.2.tgz#2dcdd6e6f1f2d82ea1b746abd8da5b284960f35a"
- integrity sha512-qK/TpmHt2M/Hg82WXHRc/W/2SGo/l1thtDHZWqFq7oi24AjZ4O/CpPSu6ZuYKFkEgmZlFoa7CooAyYmuvnaG8w==
+"@esbuild/android-x64@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.20.0.tgz#2b615abefb50dc0a70ac313971102f4ce2fdb3ca"
+ integrity sha512-uK7wAnlRvjkCPzh8jJ+QejFyrP8ObKuR5cBIsQZ+qbMunwR8sbd8krmMbxTLSrDhiPZaJYKQAU5Y3iMDcZPhyQ==
"@esbuild/darwin-arm64@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.17.19.tgz#584c34c5991b95d4d48d333300b1a4e2ff7be276"
integrity sha512-80wEoCfF/hFKM6WE1FyBHc9SfUblloAWx6FJkFWTWiCoht9Mc0ARGEM47e67W9rI09YoUxJL68WHfDRYEAvOhg==
-"@esbuild/darwin-arm64@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.19.2.tgz#55b36bc06d76f5c243987c1f93a11a80d8fc3b26"
- integrity sha512-Ora8JokrvrzEPEpZO18ZYXkH4asCdc1DLdcVy8TGf5eWtPO1Ie4WroEJzwI52ZGtpODy3+m0a2yEX9l+KUn0tA==
+"@esbuild/darwin-arm64@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.20.0.tgz#5c122ed799eb0c35b9d571097f77254964c276a2"
+ integrity sha512-AjEcivGAlPs3UAcJedMa9qYg9eSfU6FnGHJjT8s346HSKkrcWlYezGE8VaO2xKfvvlZkgAhyvl06OJOxiMgOYQ==
"@esbuild/darwin-x64@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.17.19.tgz#7751d236dfe6ce136cce343dce69f52d76b7f6cb"
integrity sha512-IJM4JJsLhRYr9xdtLytPLSH9k/oxR3boaUIYiHkAawtwNOXKE8KoU8tMvryogdcT8AU+Bflmh81Xn6Q0vTZbQw==
-"@esbuild/darwin-x64@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.19.2.tgz#982524af33a6424a3b5cb44bbd52559623ad719c"
- integrity sha512-tP+B5UuIbbFMj2hQaUr6EALlHOIOmlLM2FK7jeFBobPy2ERdohI4Ka6ZFjZ1ZYsrHE/hZimGuU90jusRE0pwDw==
+"@esbuild/darwin-x64@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.20.0.tgz#9561d277002ba8caf1524f209de2b22e93d170c1"
+ integrity sha512-bsgTPoyYDnPv8ER0HqnJggXK6RyFy4PH4rtsId0V7Efa90u2+EifxytE9pZnsDgExgkARy24WUQGv9irVbTvIw==
"@esbuild/freebsd-arm64@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.17.19.tgz#cacd171665dd1d500f45c167d50c6b7e539d5fd2"
integrity sha512-pBwbc7DufluUeGdjSU5Si+P3SoMF5DQ/F/UmTSb8HXO80ZEAJmrykPyzo1IfNbAoaqw48YRpv8shwd1NoI0jcQ==
-"@esbuild/freebsd-arm64@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.19.2.tgz#8e478a0856645265fe79eac4b31b52193011ee06"
- integrity sha512-YbPY2kc0acfzL1VPVK6EnAlig4f+l8xmq36OZkU0jzBVHcOTyQDhnKQaLzZudNJQyymd9OqQezeaBgkTGdTGeQ==
+"@esbuild/freebsd-arm64@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.20.0.tgz#84178986a3138e8500d17cc380044868176dd821"
+ integrity sha512-kQ7jYdlKS335mpGbMW5tEe3IrQFIok9r84EM3PXB8qBFJPSc6dpWfrtsC/y1pyrz82xfUIn5ZrnSHQQsd6jebQ==
"@esbuild/freebsd-x64@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.17.19.tgz#0769456eee2a08b8d925d7c00b79e861cb3162e4"
integrity sha512-4lu+n8Wk0XlajEhbEffdy2xy53dpR06SlzvhGByyg36qJw6Kpfk7cp45DR/62aPH9mtJRmIyrXAS5UWBrJT6TQ==
-"@esbuild/freebsd-x64@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.19.2.tgz#01b96604f2540db023c73809bb8ae6cd1692d6f3"
- integrity sha512-nSO5uZT2clM6hosjWHAsS15hLrwCvIWx+b2e3lZ3MwbYSaXwvfO528OF+dLjas1g3bZonciivI8qKR/Hm7IWGw==
+"@esbuild/freebsd-x64@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.20.0.tgz#3f9ce53344af2f08d178551cd475629147324a83"
+ integrity sha512-uG8B0WSepMRsBNVXAQcHf9+Ko/Tr+XqmK7Ptel9HVmnykupXdS4J7ovSQUIi0tQGIndhbqWLaIL/qO/cWhXKyQ==
"@esbuild/linux-arm64@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.17.19.tgz#38e162ecb723862c6be1c27d6389f48960b68edb"
integrity sha512-ct1Tg3WGwd3P+oZYqic+YZF4snNl2bsnMKRkb3ozHmnM0dGWuxcPTTntAF6bOP0Sp4x0PjSF+4uHQ1xvxfRKqg==
-"@esbuild/linux-arm64@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.19.2.tgz#7e5d2c7864c5c83ec789b59c77cd9c20d2594916"
- integrity sha512-ig2P7GeG//zWlU0AggA3pV1h5gdix0MA3wgB+NsnBXViwiGgY77fuN9Wr5uoCrs2YzaYfogXgsWZbm+HGr09xg==
+"@esbuild/linux-arm64@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.20.0.tgz#24efa685515689df4ecbc13031fa0a9dda910a11"
+ integrity sha512-uTtyYAP5veqi2z9b6Gr0NUoNv9F/rOzI8tOD5jKcCvRUn7T60Bb+42NDBCWNhMjkQzI0qqwXkQGo1SY41G52nw==
"@esbuild/linux-arm@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.17.19.tgz#1a2cd399c50040184a805174a6d89097d9d1559a"
integrity sha512-cdmT3KxjlOQ/gZ2cjfrQOtmhG4HJs6hhvm3mWSRDPtZ/lP5oe8FWceS10JaSJC13GBd4eH/haHnqf7hhGNLerA==
-"@esbuild/linux-arm@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.19.2.tgz#c32ae97bc0246664a1cfbdb4a98e7b006d7db8ae"
- integrity sha512-Odalh8hICg7SOD7XCj0YLpYCEc+6mkoq63UnExDCiRA2wXEmGlK5JVrW50vZR9Qz4qkvqnHcpH+OFEggO3PgTg==
+"@esbuild/linux-arm@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.20.0.tgz#6b586a488e02e9b073a75a957f2952b3b6e87b4c"
+ integrity sha512-2ezuhdiZw8vuHf1HKSf4TIk80naTbP9At7sOqZmdVwvvMyuoDiZB49YZKLsLOfKIr77+I40dWpHVeY5JHpIEIg==
"@esbuild/linux-ia32@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.17.19.tgz#e28c25266b036ce1cabca3c30155222841dc035a"
integrity sha512-w4IRhSy1VbsNxHRQpeGCHEmibqdTUx61Vc38APcsRbuVgK0OPEnQ0YD39Brymn96mOx48Y2laBQGqgZ0j9w6SQ==
-"@esbuild/linux-ia32@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.19.2.tgz#3fc4f0fa026057fe885e4a180b3956e704f1ceaa"
- integrity sha512-mLfp0ziRPOLSTek0Gd9T5B8AtzKAkoZE70fneiiyPlSnUKKI4lp+mGEnQXcQEHLJAcIYDPSyBvsUbKUG2ri/XQ==
+"@esbuild/linux-ia32@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.20.0.tgz#84ce7864f762708dcebc1b123898a397dea13624"
+ integrity sha512-c88wwtfs8tTffPaoJ+SQn3y+lKtgTzyjkD8NgsyCtCmtoIC8RDL7PrJU05an/e9VuAke6eJqGkoMhJK1RY6z4w==
"@esbuild/linux-loong64@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.17.19.tgz#0f887b8bb3f90658d1a0117283e55dbd4c9dcf72"
integrity sha512-2iAngUbBPMq439a+z//gE+9WBldoMp1s5GWsUSgqHLzLJ9WoZLZhpwWuym0u0u/4XmZ3gpHmzV84PonE+9IIdQ==
-"@esbuild/linux-loong64@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.19.2.tgz#633bcaea443f3505fb0ed109ab840c99ad3451a4"
- integrity sha512-hn28+JNDTxxCpnYjdDYVMNTR3SKavyLlCHHkufHV91fkewpIyQchS1d8wSbmXhs1fiYDpNww8KTFlJ1dHsxeSw==
+"@esbuild/linux-loong64@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.20.0.tgz#1922f571f4cae1958e3ad29439c563f7d4fd9037"
+ integrity sha512-lR2rr/128/6svngnVta6JN4gxSXle/yZEZL3o4XZ6esOqhyR4wsKyfu6qXAL04S4S5CgGfG+GYZnjFd4YiG3Aw==
"@esbuild/linux-mips64el@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.17.19.tgz#f5d2a0b8047ea9a5d9f592a178ea054053a70289"
integrity sha512-LKJltc4LVdMKHsrFe4MGNPp0hqDFA1Wpt3jE1gEyM3nKUvOiO//9PheZZHfYRfYl6AwdTH4aTcXSqBerX0ml4A==
-"@esbuild/linux-mips64el@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.19.2.tgz#e0bff2898c46f52be7d4dbbcca8b887890805823"
- integrity sha512-KbXaC0Sejt7vD2fEgPoIKb6nxkfYW9OmFUK9XQE4//PvGIxNIfPk1NmlHmMg6f25x57rpmEFrn1OotASYIAaTg==
+"@esbuild/linux-mips64el@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.20.0.tgz#7ca1bd9df3f874d18dbf46af009aebdb881188fe"
+ integrity sha512-9Sycc+1uUsDnJCelDf6ZNqgZQoK1mJvFtqf2MUz4ujTxGhvCWw+4chYfDLPepMEvVL9PDwn6HrXad5yOrNzIsQ==
"@esbuild/linux-ppc64@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.17.19.tgz#876590e3acbd9fa7f57a2c7d86f83717dbbac8c7"
integrity sha512-/c/DGybs95WXNS8y3Ti/ytqETiW7EU44MEKuCAcpPto3YjQbyK3IQVKfF6nbghD7EcLUGl0NbiL5Rt5DMhn5tg==
-"@esbuild/linux-ppc64@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.19.2.tgz#d75798da391f54a9674f8c143b9a52d1dbfbfdde"
- integrity sha512-dJ0kE8KTqbiHtA3Fc/zn7lCd7pqVr4JcT0JqOnbj4LLzYnp+7h8Qi4yjfq42ZlHfhOCM42rBh0EwHYLL6LEzcw==
+"@esbuild/linux-ppc64@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.20.0.tgz#8f95baf05f9486343bceeb683703875d698708a4"
+ integrity sha512-CoWSaaAXOZd+CjbUTdXIJE/t7Oz+4g90A3VBCHLbfuc5yUQU/nFDLOzQsN0cdxgXd97lYW/psIIBdjzQIwTBGw==
"@esbuild/linux-riscv64@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.17.19.tgz#7f49373df463cd9f41dc34f9b2262d771688bf09"
integrity sha512-FC3nUAWhvFoutlhAkgHf8f5HwFWUL6bYdvLc/TTuxKlvLi3+pPzdZiFKSWz/PF30TB1K19SuCxDTI5KcqASJqA==
-"@esbuild/linux-riscv64@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.19.2.tgz#012409bd489ed1bb9b775541d4a46c5ded8e6dd8"
- integrity sha512-7Z/jKNFufZ/bbu4INqqCN6DDlrmOTmdw6D0gH+6Y7auok2r02Ur661qPuXidPOJ+FSgbEeQnnAGgsVynfLuOEw==
+"@esbuild/linux-riscv64@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.20.0.tgz#ca63b921d5fe315e28610deb0c195e79b1a262ca"
+ integrity sha512-mlb1hg/eYRJUpv8h/x+4ShgoNLL8wgZ64SUr26KwglTYnwAWjkhR2GpoKftDbPOCnodA9t4Y/b68H4J9XmmPzA==
"@esbuild/linux-s390x@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.17.19.tgz#e2afd1afcaf63afe2c7d9ceacd28ec57c77f8829"
integrity sha512-IbFsFbxMWLuKEbH+7sTkKzL6NJmG2vRyy6K7JJo55w+8xDk7RElYn6xvXtDW8HCfoKBFK69f3pgBJSUSQPr+4Q==
-"@esbuild/linux-s390x@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.19.2.tgz#ece3ed75c5a150de8a5c110f02e97d315761626b"
- integrity sha512-U+RinR6aXXABFCcAY4gSlv4CL1oOVvSSCdseQmGO66H+XyuQGZIUdhG56SZaDJQcLmrSfRmx5XZOWyCJPRqS7g==
+"@esbuild/linux-s390x@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.20.0.tgz#cb3d069f47dc202f785c997175f2307531371ef8"
+ integrity sha512-fgf9ubb53xSnOBqyvWEY6ukBNRl1mVX1srPNu06B6mNsNK20JfH6xV6jECzrQ69/VMiTLvHMicQR/PgTOgqJUQ==
"@esbuild/linux-x64@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.17.19.tgz#8a0e9738b1635f0c53389e515ae83826dec22aa4"
integrity sha512-68ngA9lg2H6zkZcyp22tsVt38mlhWde8l3eJLWkyLrp4HwMUr3c1s/M2t7+kHIhvMjglIBrFpncX1SzMckomGw==
-"@esbuild/linux-x64@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.19.2.tgz#dea187019741602d57aaf189a80abba261fbd2aa"
- integrity sha512-oxzHTEv6VPm3XXNaHPyUTTte+3wGv7qVQtqaZCrgstI16gCuhNOtBXLEBkBREP57YTd68P0VgDgG73jSD8bwXQ==
+"@esbuild/linux-x64@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.20.0.tgz#ac617e0dc14e9758d3d7efd70288c14122557dc7"
+ integrity sha512-H9Eu6MGse++204XZcYsse1yFHmRXEWgadk2N58O/xd50P9EvFMLJTQLg+lB4E1cF2xhLZU5luSWtGTb0l9UeSg==
"@esbuild/netbsd-x64@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.17.19.tgz#c29fb2453c6b7ddef9a35e2c18b37bda1ae5c462"
integrity sha512-CwFq42rXCR8TYIjIfpXCbRX0rp1jo6cPIUPSaWwzbVI4aOfX96OXY8M6KNmtPcg7QjYeDmN+DD0Wp3LaBOLf4Q==
-"@esbuild/netbsd-x64@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.19.2.tgz#bbfd7cf9ab236a23ee3a41b26f0628c57623d92a"
- integrity sha512-WNa5zZk1XpTTwMDompZmvQLHszDDDN7lYjEHCUmAGB83Bgs20EMs7ICD+oKeT6xt4phV4NDdSi/8OfjPbSbZfQ==
+"@esbuild/netbsd-x64@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.20.0.tgz#6cc778567f1513da6e08060e0aeb41f82eb0f53c"
+ integrity sha512-lCT675rTN1v8Fo+RGrE5KjSnfY0x9Og4RN7t7lVrN3vMSjy34/+3na0q7RIfWDAj0e0rCh0OL+P88lu3Rt21MQ==
"@esbuild/openbsd-x64@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.17.19.tgz#95e75a391403cb10297280d524d66ce04c920691"
integrity sha512-cnq5brJYrSZ2CF6c35eCmviIN3k3RczmHz8eYaVlNasVqsNY+JKohZU5MKmaOI+KkllCdzOKKdPs762VCPC20g==
-"@esbuild/openbsd-x64@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.19.2.tgz#fa5c4c6ee52a360618f00053652e2902e1d7b4a7"
- integrity sha512-S6kI1aT3S++Dedb7vxIuUOb3oAxqxk2Rh5rOXOTYnzN8JzW1VzBd+IqPiSpgitu45042SYD3HCoEyhLKQcDFDw==
+"@esbuild/openbsd-x64@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.20.0.tgz#76848bcf76b4372574fb4d06cd0ed1fb29ec0fbe"
+ integrity sha512-HKoUGXz/TOVXKQ+67NhxyHv+aDSZf44QpWLa3I1lLvAwGq8x1k0T+e2HHSRvxWhfJrFxaaqre1+YyzQ99KixoA==
"@esbuild/sunos-x64@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.17.19.tgz#722eaf057b83c2575937d3ffe5aeb16540da7273"
integrity sha512-vCRT7yP3zX+bKWFeP/zdS6SqdWB8OIpaRq/mbXQxTGHnIxspRtigpkUcDMlSCOejlHowLqII7K2JKevwyRP2rg==
-"@esbuild/sunos-x64@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.19.2.tgz#52a2ac8ac6284c02d25df22bb4cfde26fbddd68d"
- integrity sha512-VXSSMsmb+Z8LbsQGcBMiM+fYObDNRm8p7tkUDMPG/g4fhFX5DEFmjxIEa3N8Zr96SjsJ1woAhF0DUnS3MF3ARw==
+"@esbuild/sunos-x64@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.20.0.tgz#ea4cd0639bf294ad51bc08ffbb2dac297e9b4706"
+ integrity sha512-GDwAqgHQm1mVoPppGsoq4WJwT3vhnz/2N62CzhvApFD1eJyTroob30FPpOZabN+FgCjhG+AgcZyOPIkR8dfD7g==
"@esbuild/win32-arm64@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.17.19.tgz#9aa9dc074399288bdcdd283443e9aeb6b9552b6f"
integrity sha512-yYx+8jwowUstVdorcMdNlzklLYhPxjniHWFKgRqH7IFlUEa0Umu3KuYplf1HUZZ422e3NU9F4LGb+4O0Kdcaag==
-"@esbuild/win32-arm64@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.19.2.tgz#719ed5870855de8537aef8149694a97d03486804"
- integrity sha512-5NayUlSAyb5PQYFAU9x3bHdsqB88RC3aM9lKDAz4X1mo/EchMIT1Q+pSeBXNgkfNmRecLXA0O8xP+x8V+g/LKg==
+"@esbuild/win32-arm64@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.20.0.tgz#a5c171e4a7f7e4e8be0e9947a65812c1535a7cf0"
+ integrity sha512-0vYsP8aC4TvMlOQYozoksiaxjlvUcQrac+muDqj1Fxy6jh9l9CZJzj7zmh8JGfiV49cYLTorFLxg7593pGldwQ==
"@esbuild/win32-ia32@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.17.19.tgz#95ad43c62ad62485e210f6299c7b2571e48d2b03"
integrity sha512-eggDKanJszUtCdlVs0RB+h35wNlb5v4TWEkq4vZcmVt5u/HiDZrTXe2bWFQUez3RgNHwx/x4sk5++4NSSicKkw==
-"@esbuild/win32-ia32@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.19.2.tgz#24832223880b0f581962c8660f8fb8797a1e046a"
- integrity sha512-47gL/ek1v36iN0wL9L4Q2MFdujR0poLZMJwhO2/N3gA89jgHp4MR8DKCmwYtGNksbfJb9JoTtbkoe6sDhg2QTA==
+"@esbuild/win32-ia32@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.20.0.tgz#f8ac5650c412d33ea62d7551e0caf82da52b7f85"
+ integrity sha512-p98u4rIgfh4gdpV00IqknBD5pC84LCub+4a3MO+zjqvU5MVXOc3hqR2UgT2jI2nh3h8s9EQxmOsVI3tyzv1iFg==
"@esbuild/win32-x64@0.17.19":
version "0.17.19"
resolved "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.17.19.tgz#8cfaf2ff603e9aabb910e9c0558c26cf32744061"
integrity sha512-lAhycmKnVOuRYNtRtatQR1LPQf2oYCkRGkSFnseDAKPl8lu5SOsK/e1sXe5a0Pc5kHIHe6P2I/ilntNv2xf3cA==
-"@esbuild/win32-x64@0.19.2":
- version "0.19.2"
- resolved "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.19.2.tgz#1205014625790c7ff0e471644a878a65d1e34ab0"
- integrity sha512-tcuhV7ncXBqbt/Ybf0IyrMcwVOAPDckMK9rXNHtF17UTK18OKLpg08glminN06pt2WCoALhXdLfSPbVvK/6fxw==
+"@esbuild/win32-x64@0.20.0":
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.20.0.tgz#2efddf82828aac85e64cef62482af61c29561bee"
+ integrity sha512-NgJnesu1RtWihtTtXGFMU5YSE6JyyHPMxCwBZK7a6/8d31GuSo9l0Ss7w1Jw5QnKUawG6UEehs883kcXf5fYwg==
"@eslint-community/eslint-utils@^4.2.0", "@eslint-community/eslint-utils@^4.4.0":
version "4.4.0"
@@ -2888,33 +2893,34 @@ esbuild-plugin-alias@0.2.1:
resolved "https://registry.npmjs.org/esbuild-plugin-alias/-/esbuild-plugin-alias-0.2.1.tgz#45a86cb941e20e7c2bc68a2bea53562172494fcb"
integrity sha512-jyfL/pwPqaFXyKnj8lP8iLk6Z0m099uXR45aSN8Av1XD4vhvQutxxPzgA2bTcAwQpa1zCXDcWOlhFgyP3GKqhQ==
-esbuild@0.19.2:
- version "0.19.2"
- resolved "https://registry.npmjs.org/esbuild/-/esbuild-0.19.2.tgz#b1541828a89dfb6f840d38538767c6130dca2aac"
- integrity sha512-G6hPax8UbFakEj3hWO0Vs52LQ8k3lnBhxZWomUJDxfz3rZTLqF5k/FCzuNdLx2RbpBiQQF9H9onlDDH1lZsnjg==
+esbuild@0.20.0:
+ version "0.20.0"
+ resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.20.0.tgz#a7170b63447286cd2ff1f01579f09970e6965da4"
+ integrity sha512-6iwE3Y2RVYCME1jLpBqq7LQWK3MW6vjV2bZy6gt/WrqkY+WE74Spyc0ThAOYpMtITvnjX09CrC6ym7A/m9mebA==
optionalDependencies:
- "@esbuild/android-arm" "0.19.2"
- "@esbuild/android-arm64" "0.19.2"
- "@esbuild/android-x64" "0.19.2"
- "@esbuild/darwin-arm64" "0.19.2"
- "@esbuild/darwin-x64" "0.19.2"
- "@esbuild/freebsd-arm64" "0.19.2"
- "@esbuild/freebsd-x64" "0.19.2"
- "@esbuild/linux-arm" "0.19.2"
- "@esbuild/linux-arm64" "0.19.2"
- "@esbuild/linux-ia32" "0.19.2"
- "@esbuild/linux-loong64" "0.19.2"
- "@esbuild/linux-mips64el" "0.19.2"
- "@esbuild/linux-ppc64" "0.19.2"
- "@esbuild/linux-riscv64" "0.19.2"
- "@esbuild/linux-s390x" "0.19.2"
- "@esbuild/linux-x64" "0.19.2"
- "@esbuild/netbsd-x64" "0.19.2"
- "@esbuild/openbsd-x64" "0.19.2"
- "@esbuild/sunos-x64" "0.19.2"
- "@esbuild/win32-arm64" "0.19.2"
- "@esbuild/win32-ia32" "0.19.2"
- "@esbuild/win32-x64" "0.19.2"
+ "@esbuild/aix-ppc64" "0.20.0"
+ "@esbuild/android-arm" "0.20.0"
+ "@esbuild/android-arm64" "0.20.0"
+ "@esbuild/android-x64" "0.20.0"
+ "@esbuild/darwin-arm64" "0.20.0"
+ "@esbuild/darwin-x64" "0.20.0"
+ "@esbuild/freebsd-arm64" "0.20.0"
+ "@esbuild/freebsd-x64" "0.20.0"
+ "@esbuild/linux-arm" "0.20.0"
+ "@esbuild/linux-arm64" "0.20.0"
+ "@esbuild/linux-ia32" "0.20.0"
+ "@esbuild/linux-loong64" "0.20.0"
+ "@esbuild/linux-mips64el" "0.20.0"
+ "@esbuild/linux-ppc64" "0.20.0"
+ "@esbuild/linux-riscv64" "0.20.0"
+ "@esbuild/linux-s390x" "0.20.0"
+ "@esbuild/linux-x64" "0.20.0"
+ "@esbuild/netbsd-x64" "0.20.0"
+ "@esbuild/openbsd-x64" "0.20.0"
+ "@esbuild/sunos-x64" "0.20.0"
+ "@esbuild/win32-arm64" "0.20.0"
+ "@esbuild/win32-ia32" "0.20.0"
+ "@esbuild/win32-x64" "0.20.0"
esbuild@^0.17.11:
version "0.17.19"
From a88e9f62f371e87ac34a29305dc87a82d227ff30 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 1 Feb 2024 13:34:14 -0500
Subject: [PATCH 24/74] MINOR: [JS] Bump regenerator-runtime from 0.14.0 to
0.14.1 in /js (#39889)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Bumps [regenerator-runtime](https://github.com/facebook/regenerator)
from 0.14.0 to 0.14.1.
Commits
[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=regenerator-runtime&package-manager=npm_and_yarn&previous-version=0.14.0&new-version=0.14.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)
Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.
[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)
---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
js/package.json | 2 +-
js/yarn.lock | 8 ++++----
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/js/package.json b/js/package.json
index f96764d82245e..bb70fd0a395b0 100644
--- a/js/package.json
+++ b/js/package.json
@@ -102,7 +102,7 @@
"memfs": "4.5.0",
"mkdirp": "3.0.1",
"multistream": "4.1.0",
- "regenerator-runtime": "0.14.0",
+ "regenerator-runtime": "0.14.1",
"rollup": "4.3.0",
"rxjs": "7.8.1",
"ts-jest": "29.1.1",
diff --git a/js/yarn.lock b/js/yarn.lock
index e7dead09bf8bb..7b3180740d3da 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -5967,10 +5967,10 @@ redent@^4.0.0:
indent-string "^5.0.0"
strip-indent "^4.0.0"
-regenerator-runtime@0.14.0:
- version "0.14.0"
- resolved "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.14.0.tgz#5e19d68eb12d486f797e15a3c6a918f7cec5eb45"
- integrity sha512-srw17NI0TUWHuGa5CFGGmhfNIeja30WMBfbslPNhf6JrqQlLN5gcrvig1oqPxiVaXb0oW0XRKtH6Nngs5lKCIA==
+regenerator-runtime@0.14.1:
+ version "0.14.1"
+ resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz#356ade10263f685dda125100cd862c1db895327f"
+ integrity sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw==
regex-not@^1.0.0, regex-not@^1.0.2:
version "1.0.2"
From 796b0cc0ad0509502f5419d379225e6168e2bb06 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei
Date: Fri, 2 Feb 2024 16:49:59 +0900
Subject: [PATCH 25/74] GH-39872: [Packaging][Ubuntu] Add support for Ubuntu
24.04 Noble Numbat (#39887)
### Rationale for this change
Ubuntu 24.04 isn't released yet but it seems that Docker image is already available.
### What changes are included in this PR?
Add jobs for Ubuntu 24.04.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* Closes: #39872
Authored-by: Sutou Kouhei
Signed-off-by: Sutou Kouhei
---
dev/release/binary-task.rb | 7 +-
dev/release/verify-release-candidate.sh | 4 +-
.../apt/ubuntu-noble/Dockerfile | 41 +++++++++
.../apache-arrow/apt/ubuntu-noble-arm64/from | 18 ++++
.../apache-arrow/apt/ubuntu-noble/Dockerfile | 85 +++++++++++++++++++
dev/tasks/linux-packages/package-task.rb | 2 +
dev/tasks/tasks.yml | 3 +-
7 files changed, 156 insertions(+), 4 deletions(-)
create mode 100644 dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-noble/Dockerfile
create mode 100644 dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble-arm64/from
create mode 100644 dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile
diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb
index df6c0778dc805..0c1b98ab32c95 100644
--- a/dev/release/binary-task.rb
+++ b/dev/release/binary-task.rb
@@ -1089,6 +1089,7 @@ def available_apt_targets
["ubuntu", "focal", "main"],
["ubuntu", "jammy", "main"],
["ubuntu", "mantic", "main"],
+ ["ubuntu", "noble", "main"],
]
end
@@ -2121,8 +2122,10 @@ def apt_test_targets_default
# "ubuntu-focal-arm64",
"ubuntu-jammy",
# "ubuntu-jammy-arm64",
- "ubuntu-lunar",
- # "ubuntu-lunar-arm64",
+ "ubuntu-mantic",
+ # "ubuntu-mantic-arm64",
+ "ubuntu-noble",
+ # "ubuntu-noble-arm64",
]
end
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 04fc7fd563f65..a61b5ba094c8a 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -196,7 +196,9 @@ test_apt() {
"ubuntu:jammy" \
"arm64v8/ubuntu:jammy" \
"ubuntu:mantic" \
- "arm64v8/ubuntu:mantic"; do \
+ "arm64v8/ubuntu:mantic" \
+ "ubuntu:noble" \
+ "arm64v8/ubuntu:noble"; do \
case "${target}" in
arm64v8/*)
if [ "$(arch)" = "aarch64" -o -e /usr/bin/qemu-aarch64-static ]; then
diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-noble/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-noble/Dockerfile
new file mode 100644
index 0000000000000..0e37ee94bb0a3
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-noble/Dockerfile
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM ubuntu:noble
+
+RUN \
+ echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN \
+ echo 'APT::Install-Recommends "false";' > \
+ /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+ apt update ${quiet} && \
+ apt install -y -V ${quiet} \
+ build-essential \
+ debhelper \
+ devscripts \
+ fakeroot \
+ gnupg \
+ lsb-release && \
+ apt clean && \
+ rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble-arm64/from b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble-arm64/from
new file mode 100644
index 0000000000000..4414c353871c6
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble-arm64/from
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arm64v8/ubuntu:noble
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile
new file mode 100644
index 0000000000000..33f2d9a35371b
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=ubuntu:noble
+FROM ${FROM}
+
+RUN \
+ echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN \
+ echo 'APT::Install-Recommends "false";' > \
+ /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+RUN \
+ quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+ apt update ${quiet} && \
+ apt install -y -V ${quiet} \
+ build-essential \
+ clang \
+ clang-tools \
+ cmake \
+ debhelper \
+ devscripts \
+ git \
+ gtk-doc-tools \
+ libboost-filesystem-dev \
+ libboost-system-dev \
+ libbrotli-dev \
+ libbz2-dev \
+ libc-ares-dev \
+ libcurl4-openssl-dev \
+ libgirepository1.0-dev \
+ libglib2.0-doc \
+ libgmock-dev \
+ libgoogle-glog-dev \
+ libgrpc++-dev \
+ libgtest-dev \
+ liblz4-dev \
+ libmlir-15-dev \
+ libprotobuf-dev \
+ libprotoc-dev \
+ libre2-dev \
+ libsnappy-dev \
+ libssl-dev \
+ libthrift-dev \
+ libutf8proc-dev \
+ libzstd-dev \
+ llvm-dev \
+ lsb-release \
+ meson \
+ mlir-15-tools \
+ ninja-build \
+ nlohmann-json3-dev \
+ pkg-config \
+ protobuf-compiler-grpc \
+ python3-dev \
+ python3-pip \
+ python3-setuptools \
+ rapidjson-dev \
+ tzdata \
+ valac \
+ zlib1g-dev && \
+ if apt list | grep -q '^libcuda'; then \
+ apt install -y -V ${quiet} nvidia-cuda-toolkit; \
+ else \
+ :; \
+ fi && \
+ apt clean && \
+ rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/package-task.rb b/dev/tasks/linux-packages/package-task.rb
index ecd61054daeb1..51fe0b9a75b0c 100644
--- a/dev/tasks/linux-packages/package-task.rb
+++ b/dev/tasks/linux-packages/package-task.rb
@@ -279,6 +279,8 @@ def apt_targets_default
# "ubuntu-jammy-arm64",
"ubuntu-mantic",
# "ubuntu-mantic-arm64",
+ "ubuntu-noble",
+ # "ubuntu-noble-arm64",
]
end
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 6c59364d51a50..0f8c58391fa66 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -465,7 +465,8 @@ tasks:
"debian-trixie",
"ubuntu-focal",
"ubuntu-jammy",
- "ubuntu-mantic"] %}
+ "ubuntu-mantic",
+ "ubuntu-noble"] %}
{% for architecture in ["amd64", "arm64"] %}
{{ target }}-{{ architecture }}:
ci: github
From 129a5291a26e2baa91d98d1910cb2128854e6b60 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche
Date: Fri, 2 Feb 2024 11:26:57 +0100
Subject: [PATCH 26/74] GH-39788: [Python] Validate max_chunksize in
Table.to_batches (#39796)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
### Rationale for this change
Validating the keyword to be strictly positive, to avoid an infinite loop.
* Closes: #39788
Authored-by: Joris Van den Bossche
Signed-off-by: Raúl Cumplido
---
python/pyarrow/table.pxi | 2 ++
python/pyarrow/tests/test_table.py | 3 +++
2 files changed, 5 insertions(+)
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 3c450d61a7659..abda784fb7c18 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -4172,6 +4172,8 @@ cdef class Table(_Tabular):
reader.reset(new TableBatchReader(deref(self.table)))
if max_chunksize is not None:
+ if not max_chunksize > 0:
+ raise ValueError("'max_chunksize' should be strictly positive")
c_max_chunksize = max_chunksize
reader.get().set_chunksize(c_max_chunksize)
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index ff38c614c251f..d6def54570581 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -1089,6 +1089,9 @@ def test_table_to_batches():
table_from_iter = pa.Table.from_batches(iter([batch1, batch2, batch1]))
assert table.equals(table_from_iter)
+ with pytest.raises(ValueError):
+ table.to_batches(max_chunksize=0)
+
def test_table_basics():
data = [
From 90b30fcbfdfe12fa9ed497c3fa1cfe682b50168f Mon Sep 17 00:00:00 2001
From: Lyndon Shi <9373058+lynshi@users.noreply.github.com>
Date: Fri, 2 Feb 2024 07:15:57 -0800
Subject: [PATCH 27/74] MINOR: [C++][Docs] Fix MapBuilder docstring (#39755)
The [current `MapBuilder` documentation](https://arrow.apache.org/docs/cpp/api/builder.html#_CPPv4N5arrow10MapBuilderE) says:
> To use this class, you must append values to the key and item array builders and use the Append function to delimit each distinct map (once the keys and items have been appended)
This contradicts the [docstring for `Append`](https://arrow.apache.org/docs/cpp/api/builder.html#_CPPv4N5arrow10MapBuilder6AppendEv):
> This function should be called before beginning to append elements to the key and item builders
The `Append` documentation is correct; it should be called *before* keys and items have been appended. If `Append` is called after, as the `MapBuilder` docstring suggests, `Finish` results in an empty `Array`.
### What changes are included in this PR?
Documentation only change.
### Are these changes tested?
There are no behavior changes.
### Are there any user-facing changes?
No
Authored-by: Lyndon Shi <9373058+lynshi@users.noreply.github.com>
Signed-off-by: Benjamin Kietzman
---
cpp/src/arrow/array/builder_nested.h | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h
index 8065752f3e278..429aa5c0488cd 100644
--- a/cpp/src/arrow/array/builder_nested.h
+++ b/cpp/src/arrow/array/builder_nested.h
@@ -515,10 +515,9 @@ class ARROW_EXPORT LargeListViewBuilder final
/// \class MapBuilder
/// \brief Builder class for arrays of variable-size maps
///
-/// To use this class, you must append values to the key and item array builders
-/// and use the Append function to delimit each distinct map (once the keys and items
-/// have been appended) or use the bulk API to append a sequence of offsets and null
-/// maps.
+/// To use this class, you must use the Append function to delimit each distinct
+/// map before appending values to the key and item array builders, or use the
+/// bulk API to append a sequence of offsets and null maps.
///
/// Key uniqueness and ordering are not validated.
class ARROW_EXPORT MapBuilder : public ArrayBuilder {
From 32bd01fa64b275937ca90aa50b11f275eeefde94 Mon Sep 17 00:00:00 2001
From: mwish
Date: Sat, 3 Feb 2024 03:54:47 +0800
Subject: [PATCH 28/74] GH-39843: [C++][Parquet] Parquet binary length overflow
exception should contain the length of binary (#39844)
### Rationale for this change
See https://github.com/apache/arrow/issues/39843
It will be great to contain a string length in decoder.
### What changes are included in this PR?
change the logging of encoding
### Are these changes tested?
no
### Are there any user-facing changes?
more specific error logging?
* Closes: #39843
Authored-by: mwish
Signed-off-by: mwish
---
cpp/src/parquet/column_writer.cc | 3 ++-
cpp/src/parquet/encoding.cc | 18 ++++++++++++------
2 files changed, 14 insertions(+), 7 deletions(-)
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index 23366b2daafd5..eae8fc6125499 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -442,7 +442,8 @@ class SerializedPageWriter : public PageWriter {
if (offset_index_builder_ != nullptr) {
const int64_t compressed_size = output_data_len + header_size;
if (compressed_size > std::numeric_limits::max()) {
- throw ParquetException("Compressed page size overflows INT32_MAX.");
+ throw ParquetException("Compressed page size ", compressed_size,
+ " overflows INT32_MAX.");
}
if (!page.first_row_index().has_value()) {
throw ParquetException("First row index is not set in data page.");
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index 5573f5b9aed4c..a3d1746536647 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -160,7 +160,8 @@ class PlainEncoder : public EncoderImpl, virtual public TypedEncoder {
*array.data(),
[&](::std::string_view view) {
if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
- return Status::Invalid("Parquet cannot store strings with size 2GB or more");
+ return Status::Invalid(
+ "Parquet cannot store strings with size 2GB or more, got: ", view.size());
}
UnsafePutByteArray(view.data(), static_cast(view.size()));
return Status::OK();
@@ -571,7 +572,8 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder {
*array.data(),
[&](::std::string_view view) {
if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
- return Status::Invalid("Parquet cannot store strings with size 2GB or more");
+ return Status::Invalid(
+ "Parquet cannot store strings with size 2GB or more, got: ", view.size());
}
PutByteArray(view.data(), static_cast(view.size()));
return Status::OK();
@@ -585,7 +587,8 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder {
for (int64_t i = 0; i < array.length(); i++) {
auto v = array.GetView(i);
if (ARROW_PREDICT_FALSE(v.size() > kMaxByteArraySize)) {
- throw ParquetException("Parquet cannot store strings with size 2GB or more");
+ throw ParquetException(
+ "Parquet cannot store strings with size 2GB or more, got: ", v.size());
}
dict_encoded_size_ += static_cast(v.size() + sizeof(uint32_t));
int32_t unused_memo_index;
@@ -2671,7 +2674,8 @@ class DeltaLengthByteArrayEncoder : public EncoderImpl,
*array.data(),
[&](::std::string_view view) {
if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
- return Status::Invalid("Parquet cannot store strings with size 2GB or more");
+ return Status::Invalid(
+ "Parquet cannot store strings with size 2GB or more, got: ", view.size());
}
length_encoder_.Put({static_cast(view.length())}, 1);
PARQUET_THROW_NOT_OK(sink_.Append(view.data(), view.length()));
@@ -3200,7 +3204,8 @@ class DeltaByteArrayEncoder : public EncoderImpl, virtual public TypedEncoder= kMaxByteArraySize)) {
- return Status::Invalid("Parquet cannot store strings with size 2GB or more");
+ return Status::Invalid(
+ "Parquet cannot store strings with size 2GB or more, got: ", view.size());
}
const ByteArray src{view};
@@ -3246,7 +3251,8 @@ struct ByteArrayVisitor {
std::string_view operator[](int i) const {
if (ARROW_PREDICT_FALSE(src[i].len >= kMaxByteArraySize)) {
- throw ParquetException("Parquet cannot store strings with size 2GB or more");
+ throw ParquetException("Parquet cannot store strings with size 2GB or more, got: ",
+ src[i].len);
}
return std::string_view{src[i]};
}
From 0fb00fdea7a9541ac8df8a4f784af1dfd0adb056 Mon Sep 17 00:00:00 2001
From: Vibhatha Lakmal Abeykoon
Date: Sat, 3 Feb 2024 01:45:40 +0530
Subject: [PATCH 29/74] GH-39734: [Java] Bump
org.codehaus.mojo:exec-maven-plugin from 1.6.0 to 3.1.1 (#39696)
### Rationale for this change
This PR was created to replace https://github.com/apache/arrow/pull/39374 and do the necessary changes for `org.codehaus.mojo` upgrade to take place.
### What changes are included in this PR?
The changes to the `org.codehaus.mojo` version and an upgrade on the maven version used in the `.env`.
### Are these changes tested?
Tested locally, but this requires a CI verification on Java.
### Are there any user-facing changes?
No
Authored-by: vibhatha
Signed-off-by: David Li
---
.env | 2 +-
docker-compose.yml | 12 +++---------
java/performance/pom.xml | 2 +-
java/pom.xml | 6 +++---
4 files changed, 8 insertions(+), 14 deletions(-)
diff --git a/.env b/.env
index 6746892fd4ed8..427a4ab0bf398 100644
--- a/.env
+++ b/.env
@@ -65,7 +65,7 @@ JDK=8
KARTOTHEK=latest
# LLVM 12 and GCC 11 reports -Wmismatched-new-delete.
LLVM=14
-MAVEN=3.5.4
+MAVEN=3.6.3
NODE=18
NUMBA=latest
NUMPY=latest
diff --git a/docker-compose.yml b/docker-compose.yml
index a08345c198fa0..0252c4ec8a896 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1709,9 +1709,7 @@ services:
arch: ${ARCH}
# Use a newer JDK as it seems to improve stability
jdk: 17
- # conda-forge doesn't have 3.5.4 so pinning explicitly, but this should
- # be set to ${MAVEN}
- maven: 3.5
+ maven: ${MAVEN}
node: ${NODE}
go: ${GO}
volumes: *conda-volumes
@@ -1843,9 +1841,7 @@ services:
arch: ${ARCH}
python: ${PYTHON}
jdk: ${JDK}
- # conda-forge doesn't have 3.5.4 so pinning explicitly, but this should
- # be set to ${MAVEN}
- maven: 3.5
+ maven: ${MAVEN}
hdfs: ${HDFS}
links:
- impala:impala
@@ -1886,9 +1882,7 @@ services:
arch: ${ARCH}
python: ${PYTHON}
jdk: ${JDK}
- # conda-forge doesn't have 3.5.4 so pinning explicitly, but this should
- # be set to ${MAVEN}
- maven: 3.5
+ maven: ${MAVEN}
spark: ${SPARK}
numpy: ${NUMPY}
shm_size: *shm-size
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index a1d53171f549b..ba5a6616dca77 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -139,7 +139,7 @@
org.codehaus.mojo
exec-maven-plugin
- 1.6.0
+ 3.1.1
run-java-benchmarks
diff --git a/java/pom.xml b/java/pom.xml
index 3e595648ed085..7871303634976 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -1038,7 +1038,7 @@
org.codehaus.mojo
exec-maven-plugin
- 3.1.0
+ 3.1.1
cdata-cmake
@@ -1099,7 +1099,7 @@
org.codehaus.mojo
exec-maven-plugin
- 3.1.0
+ 3.1.1
jni-cpp-cmake
@@ -1214,7 +1214,7 @@
org.codehaus.mojo
exec-maven-plugin
- 3.1.0
+ 3.1.1
jni-cpp-cmake
From 22f2cfd1e1ebe49016b6d97c49f494287a98d02f Mon Sep 17 00:00:00 2001
From: Divyansh200102 <146909065+Divyansh200102@users.noreply.github.com>
Date: Sat, 3 Feb 2024 16:29:49 +0530
Subject: [PATCH 30/74] GH-39416: [GLib][Docs] Fixed Broken Link in README
Content (#39896)
### Rationale for this change
### What changes are included in this PR?
Fixed Broken Link in README Content
### Are these changes tested?
Yes
### Are there any user-facing changes?
Yes
* Closes: #39416
Lead-authored-by: Divyansh200102
Co-authored-by: Divyansh200102 <146909065+Divyansh200102@users.noreply.github.com>
Co-authored-by: Sutou Kouhei
Signed-off-by: Sutou Kouhei
---
c_glib/README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/c_glib/README.md b/c_glib/README.md
index 2a4d6b8a6628c..24e69eff65055 100644
--- a/c_glib/README.md
+++ b/c_glib/README.md
@@ -101,7 +101,7 @@ $ sudo meson install -C c_glib.build
You need to install Arrow C++ before you install Arrow GLib. See Arrow
C++ document about how to install Arrow C++.
-You need [GTK-Doc](https://www.gtk.org/gtk-doc/) and
+You need [GTK-Doc](https://gitlab.gnome.org/GNOME/gtk-doc) and
[GObject Introspection](https://wiki.gnome.org/Projects/GObjectIntrospection)
to build Arrow GLib. You can install them by the followings:
From aded7bf37686a16fc4b0649ab97231427a219d7b Mon Sep 17 00:00:00 2001
From: david dali susanibar arce
Date: Sun, 4 Feb 2024 01:37:36 -0500
Subject: [PATCH 31/74] GH-39909: [Java][CI] Update reference to Float16
testing file reference on Testing submodule (#39911)
### Rationale for this change
Update reference to Float16 testing file reference on Testing submodule.
### What changes are included in this PR?
Testing submodule. changes.
### Are these changes tested?
Yes
### Are there any user-facing changes?
No
* Closes: #39909
Authored-by: david dali susanibar arce
Signed-off-by: Sutou Kouhei
---
testing | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/testing b/testing
index ad82a736c170e..25d16511e8d42 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit ad82a736c170e97b7c8c035ebd8a801c17eec170
+Subproject commit 25d16511e8d42c2744a1d94d90169e3a36e92631
From 585e0a252f327e7136695f586b187b2ba5a3a1e3 Mon Sep 17 00:00:00 2001
From: Gang Wu
Date: Mon, 5 Feb 2024 05:55:54 +0800
Subject: [PATCH 32/74] MINOR: [C++][Parquet] Remove undefined GetArrowType
from schema_internal.h (#39931)
### Rationale for this change
We have redundant declarations below and the 1st one should be removed:
```cpp
Result> GetArrowType(Type::type physical_type,
const LogicalType& logical_type,
int type_length);
Result> GetArrowType(
Type::type physical_type, const LogicalType& logical_type, int type_length,
::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO);
```
### What changes are included in this PR?
Remove the redundant function declaration described above.
### Are these changes tested?
Make sure build and test pass.
### Are there any user-facing changes?
No.
Authored-by: Gang Wu
Signed-off-by: Sutou Kouhei
---
cpp/src/parquet/arrow/schema_internal.h | 4 ----
1 file changed, 4 deletions(-)
diff --git a/cpp/src/parquet/arrow/schema_internal.h b/cpp/src/parquet/arrow/schema_internal.h
index 55292ac35ab9c..f56ba0958ae2d 100644
--- a/cpp/src/parquet/arrow/schema_internal.h
+++ b/cpp/src/parquet/arrow/schema_internal.h
@@ -34,10 +34,6 @@ Result> FromFLBA(const LogicalType& logical_t
Result> FromInt32(const LogicalType& logical_type);
Result> FromInt64(const LogicalType& logical_type);
-Result> GetArrowType(Type::type physical_type,
- const LogicalType& logical_type,
- int type_length);
-
Result> GetArrowType(
Type::type physical_type, const LogicalType& logical_type, int type_length,
::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO);
From ed78986aa6971484f40a5780922128636a47d175 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei
Date: Mon, 5 Feb 2024 11:51:04 +0900
Subject: [PATCH 33/74] GH-39928: [C++][Gandiva] Accept LLVM 18 (#39934)
### Rationale for this change
LLVM 18.1 will be released soon.
### What changes are included in this PR?
Accept LLVM 18.1.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* Closes: #39928
Authored-by: Sutou Kouhei
Signed-off-by: Sutou Kouhei
---
cpp/CMakeLists.txt | 1 +
cpp/src/gandiva/engine.cc | 13 ++++++++++++-
2 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 016cd8a1b9ec8..50a85b33d5489 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -152,6 +152,7 @@ set(ARROW_DOC_DIR "share/doc/${PROJECT_NAME}")
set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support")
set(ARROW_LLVM_VERSIONS
+ "18.1"
"17.0"
"16.0"
"15.0"
diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc
index fc047f2ac0763..bfce72cefc630 100644
--- a/cpp/src/gandiva/engine.cc
+++ b/cpp/src/gandiva/engine.cc
@@ -62,7 +62,11 @@
#endif
#include
#include
+#if LLVM_VERSION_MAJOR >= 18
+#include
+#else
#include
+#endif
#include
#include
#if LLVM_VERSION_MAJOR >= 14
@@ -86,7 +90,9 @@
#include
#include
#include
+#if LLVM_VERSION_MAJOR <= 17
#include
+#endif
// JITLink is available in LLVM 9+
// but the `InProcessMemoryManager::Create` API was added since LLVM 14
@@ -132,8 +138,13 @@ Result MakeTargetMachineBuilder(
jtmb.setCPU(cpu_name.str());
jtmb.addFeatures(cpu_attrs);
}
+#if LLVM_VERSION_MAJOR >= 18
+ using CodeGenOptLevel = llvm::CodeGenOptLevel;
+#else
+ using CodeGenOptLevel = llvm::CodeGenOpt::Level;
+#endif
auto const opt_level =
- conf.optimize() ? llvm::CodeGenOpt::Aggressive : llvm::CodeGenOpt::None;
+ conf.optimize() ? CodeGenOptLevel::Aggressive : CodeGenOptLevel::None;
jtmb.setCodeGenOptLevel(opt_level);
return jtmb;
}
From 5856421e31b163104570d0305cb79f323cf488a6 Mon Sep 17 00:00:00 2001
From: mwish
Date: Mon, 5 Feb 2024 23:14:48 +0800
Subject: [PATCH 34/74] GH-39921: [Go][Parquet] ColumnWriter not reset
TotalCompressedBytes after Flush (#39922)
### Rationale for this change
See https://github.com/apache/arrow/issues/39921
### What changes are included in this PR?
Not clearing `totalCompressedBytes` when flush called
### Are these changes tested?
Yes
### Are there any user-facing changes?
Yes, it's a bugfix
* Closes: #39921
Authored-by: mwish
Signed-off-by: Matt Topol
---
go/parquet/file/column_writer.go | 5 +++--
go/parquet/file/column_writer_test.go | 28 +++++++++++++++++++++++++++
2 files changed, 31 insertions(+), 2 deletions(-)
diff --git a/go/parquet/file/column_writer.go b/go/parquet/file/column_writer.go
index ac857d17e632d..36663b10b89dd 100755
--- a/go/parquet/file/column_writer.go
+++ b/go/parquet/file/column_writer.go
@@ -397,7 +397,6 @@ func (w *columnWriter) FlushBufferedDataPages() (err error) {
}
}
w.pages = w.pages[:0]
- w.totalCompressedBytes = 0
return
}
@@ -542,7 +541,9 @@ func (w *columnWriter) Close() (err error) {
if !w.closed {
w.closed = true
if w.hasDict && !w.fallbackToNonDict {
- w.WriteDictionaryPage()
+ if err = w.WriteDictionaryPage(); err != nil {
+ return err
+ }
}
if err = w.FlushBufferedDataPages(); err != nil {
diff --git a/go/parquet/file/column_writer_test.go b/go/parquet/file/column_writer_test.go
index 8011ac2487995..321e7b730d165 100755
--- a/go/parquet/file/column_writer_test.go
+++ b/go/parquet/file/column_writer_test.go
@@ -426,6 +426,26 @@ func (p *PrimitiveWriterTestSuite) testDictionaryFallbackEncoding(version parque
}
}
+func (p *PrimitiveWriterTestSuite) testDictionaryFallbackAndCompressedSize(version parquet.Version) {
+ p.GenerateData(SmallSize)
+ props := parquet.DefaultColumnProperties()
+ props.DictionaryEnabled = true
+
+ if version == parquet.V1_0 {
+ props.Encoding = parquet.Encodings.PlainDict
+ } else {
+ props.Encoding = parquet.Encodings.RLEDict
+ }
+
+ writer := p.buildWriter(SmallSize, props, parquet.WithVersion(version))
+ p.WriteBatchValues(writer, nil, nil)
+ writer.FallbackToPlain()
+ p.NotEqual(0, writer.TotalCompressedBytes())
+ writer.Close()
+ p.NotEqual(0, writer.TotalCompressedBytes())
+ p.NotEqual(0, writer.TotalBytesWritten())
+}
+
func (p *PrimitiveWriterTestSuite) TestRequiredPlain() {
p.testRequiredWithEncoding(parquet.Encodings.Plain)
}
@@ -575,6 +595,14 @@ func (p *PrimitiveWriterTestSuite) TestDictionaryFallbackEncodingV2() {
p.testDictionaryFallbackEncoding(parquet.V2_LATEST)
}
+func (p *PrimitiveWriterTestSuite) TestDictionaryFallbackStatsV1() {
+ p.testDictionaryFallbackAndCompressedSize(parquet.V1_0)
+}
+
+func (p *PrimitiveWriterTestSuite) TestDictionaryFallbackStatsV2() {
+ p.testDictionaryFallbackAndCompressedSize(parquet.V2_LATEST)
+}
+
func (p *PrimitiveWriterTestSuite) TestOptionalNullValueChunk() {
// test case for NULL values
p.SetupSchema(parquet.Repetitions.Optional, 1)
From 85e2a684b79b560929085c7f8e27586fa6d0b1ff Mon Sep 17 00:00:00 2001
From: Elliot Morrison-Reed
Date: Mon, 5 Feb 2024 10:45:46 -0500
Subject: [PATCH 35/74] GH-39925: [Go][Parquet] Fix re-slicing in
maybeReplaceValidity function (#39926)
### Rationale for this change
See #39925.
### What changes are included in this PR?
Fixes re-slicing logic for multiple data-types and negative length bug.
### Are these changes tested?
There is a new test in the PR.
### Are there any user-facing changes?
No, it just fixes a bug.
* Closes: #39925
Authored-by: Morrison-Reed Elliot (BEG/EVS1-NA)
Signed-off-by: Matt Topol
---
go/parquet/file/column_writer.go | 5 +++-
go/parquet/file/column_writer_test.go | 38 +++++++++++++++++++++++++++
2 files changed, 42 insertions(+), 1 deletion(-)
diff --git a/go/parquet/file/column_writer.go b/go/parquet/file/column_writer.go
index 36663b10b89dd..4d603c547ca6a 100755
--- a/go/parquet/file/column_writer.go
+++ b/go/parquet/file/column_writer.go
@@ -660,7 +660,10 @@ func (w *columnWriter) maybeReplaceValidity(values arrow.Array, newNullCount int
if values.Data().Offset() > 0 {
data := values.Data()
- buffers[1] = memory.NewBufferBytes(data.Buffers()[1].Bytes()[data.Offset()*arrow.Int32SizeBytes : data.Len()*arrow.Int32SizeBytes])
+ elemSize := data.DataType().(arrow.FixedWidthDataType).Bytes()
+ start := data.Offset() * elemSize
+ end := start + data.Len()*elemSize
+ buffers[1] = memory.NewBufferBytes(data.Buffers()[1].Bytes()[start:end])
}
data := array.NewData(values.DataType(), values.Len(), buffers, nil, int(newNullCount), 0)
diff --git a/go/parquet/file/column_writer_test.go b/go/parquet/file/column_writer_test.go
index 321e7b730d165..dd597e280b850 100755
--- a/go/parquet/file/column_writer_test.go
+++ b/go/parquet/file/column_writer_test.go
@@ -24,6 +24,8 @@ import (
"sync"
"testing"
+ "github.com/apache/arrow/go/v16/arrow"
+ "github.com/apache/arrow/go/v16/arrow/array"
"github.com/apache/arrow/go/v16/arrow/bitutil"
"github.com/apache/arrow/go/v16/arrow/memory"
arrutils "github.com/apache/arrow/go/v16/internal/utils"
@@ -36,6 +38,7 @@ import (
"github.com/apache/arrow/go/v16/parquet/internal/testutils"
"github.com/apache/arrow/go/v16/parquet/internal/utils"
"github.com/apache/arrow/go/v16/parquet/metadata"
+ "github.com/apache/arrow/go/v16/parquet/pqarrow"
"github.com/apache/arrow/go/v16/parquet/schema"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
@@ -736,3 +739,38 @@ func (b *BooleanValueWriterSuite) TestAlternateBooleanValues() {
b.Equal(i%2 == 0, b.ValuesOut.([]bool)[i])
}
}
+
+func TestDictionaryReslice(t *testing.T) {
+ pts := []arrow.DataType{
+ arrow.PrimitiveTypes.Int8,
+ arrow.PrimitiveTypes.Int16,
+ arrow.PrimitiveTypes.Int32,
+ arrow.PrimitiveTypes.Int64,
+ arrow.PrimitiveTypes.Uint8,
+ arrow.PrimitiveTypes.Uint16,
+ arrow.PrimitiveTypes.Uint32,
+ arrow.PrimitiveTypes.Uint64,
+ }
+ for _, pt := range pts {
+ t.Run(pt.String(), func(t *testing.T) {
+ mem := memory.NewGoAllocator()
+ dt := &arrow.DictionaryType{
+ IndexType: pt,
+ ValueType: &arrow.StringType{},
+ }
+ field := arrow.Field{Name: "test_field", Type: dt, Nullable: true}
+ schema := arrow.NewSchema([]arrow.Field{field}, nil)
+ b := array.NewRecordBuilder(mem, schema)
+ for i := 0; i < 2000; i++ {
+ b.Field(0).(*array.BinaryDictionaryBuilder).AppendString("test_value")
+ }
+ rec := b.NewRecord()
+ out := &bytes.Buffer{}
+ pqw, err := pqarrow.NewFileWriter(rec.Schema(), out, nil, pqarrow.NewArrowWriterProperties())
+ assert.NoError(t, err)
+ err = pqw.WriteBuffered(rec)
+ assert.NoError(t, err)
+
+ })
+ }
+}
From 56951fee35c920ac898c2515896ff3bd752dde97 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou
Date: Mon, 5 Feb 2024 17:15:44 +0100
Subject: [PATCH 36/74] GH-39865: [C++] Strip extension metadata when importing
a registered extension (#39866)
### Rationale for this change
When importing an extension type from the C Data Interface and the extension type is registered, we would still leave the extension-related metadata on the storage type.
### What changes are included in this PR?
Strip extension-related metadata on the storage type if we succeed in recreating the extension type.
This matches the behavior of the IPC layer and allows for more exact roundtripping.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
No, unless people mistakingly rely on the presence of said metadata.
* Closes: #39865
Authored-by: Antoine Pitrou
Signed-off-by: Antoine Pitrou
---
cpp/src/arrow/c/bridge.cc | 6 +++
cpp/src/arrow/c/bridge_test.cc | 48 ++++++++++++++++--------
cpp/src/arrow/util/key_value_metadata.cc | 18 ++++-----
cpp/src/arrow/util/key_value_metadata.h | 11 +++---
4 files changed, 52 insertions(+), 31 deletions(-)
diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 172ed8962ce77..9b165a10a61e7 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -914,6 +914,8 @@ struct DecodedMetadata {
std::shared_ptr metadata;
std::string extension_name;
std::string extension_serialized;
+ int extension_name_index = -1; // index of extension_name in metadata
+ int extension_serialized_index = -1; // index of extension_serialized in metadata
};
Result DecodeMetadata(const char* metadata) {
@@ -956,8 +958,10 @@ Result DecodeMetadata(const char* metadata) {
RETURN_NOT_OK(read_string(&values[i]));
if (keys[i] == kExtensionTypeKeyName) {
decoded.extension_name = values[i];
+ decoded.extension_name_index = i;
} else if (keys[i] == kExtensionMetadataKeyName) {
decoded.extension_serialized = values[i];
+ decoded.extension_serialized_index = i;
}
}
decoded.metadata = key_value_metadata(std::move(keys), std::move(values));
@@ -1046,6 +1050,8 @@ struct SchemaImporter {
ARROW_ASSIGN_OR_RAISE(
type_, registered_ext_type->Deserialize(std::move(type_),
metadata_.extension_serialized));
+ RETURN_NOT_OK(metadata_.metadata->DeleteMany(
+ {metadata_.extension_name_index, metadata_.extension_serialized_index}));
}
}
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 321ec36c38d8c..8b67027454c55 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -1872,7 +1872,7 @@ class TestSchemaImport : public ::testing::Test, public SchemaStructBuilder {
ASSERT_TRUE(ArrowSchemaIsReleased(&c_struct_));
Reset(); // for further tests
cb.AssertCalled(); // was released
- AssertTypeEqual(*expected, *type);
+ AssertTypeEqual(*expected, *type, /*check_metadata=*/true);
}
void CheckImport(const std::shared_ptr& expected) {
@@ -1892,7 +1892,7 @@ class TestSchemaImport : public ::testing::Test, public SchemaStructBuilder {
ASSERT_TRUE(ArrowSchemaIsReleased(&c_struct_));
Reset(); // for further tests
cb.AssertCalled(); // was released
- AssertSchemaEqual(*expected, *schema);
+ AssertSchemaEqual(*expected, *schema, /*check_metadata=*/true);
}
void CheckImportError() {
@@ -3571,7 +3571,7 @@ class TestSchemaRoundtrip : public ::testing::Test {
// Recreate the type
ASSERT_OK_AND_ASSIGN(actual, ImportType(&c_schema));
type = factory_expected();
- AssertTypeEqual(*type, *actual);
+ AssertTypeEqual(*type, *actual, /*check_metadata=*/true);
type.reset();
actual.reset();
@@ -3602,7 +3602,7 @@ class TestSchemaRoundtrip : public ::testing::Test {
// Recreate the schema
ASSERT_OK_AND_ASSIGN(actual, ImportSchema(&c_schema));
schema = factory();
- AssertSchemaEqual(*schema, *actual);
+ AssertSchemaEqual(*schema, *actual, /*check_metadata=*/true);
schema.reset();
actual.reset();
@@ -3695,13 +3695,27 @@ TEST_F(TestSchemaRoundtrip, Dictionary) {
}
}
+// Given an extension type, return a field of its storage type + the
+// serialized extension metadata.
+std::shared_ptr GetStorageWithMetadata(const std::string& field_name,
+ const std::shared_ptr& type) {
+ const auto& ext_type = checked_cast(*type);
+ auto storage_type = ext_type.storage_type();
+ auto md = KeyValueMetadata::Make({kExtensionTypeKeyName, kExtensionMetadataKeyName},
+ {ext_type.extension_name(), ext_type.Serialize()});
+ return field(field_name, storage_type, /*nullable=*/true, md);
+}
+
TEST_F(TestSchemaRoundtrip, UnregisteredExtension) {
TestWithTypeFactory(uuid, []() { return fixed_size_binary(16); });
TestWithTypeFactory(dict_extension_type, []() { return dictionary(int8(), utf8()); });
- // Inside nested type
- TestWithTypeFactory([]() { return list(dict_extension_type()); },
- []() { return list(dictionary(int8(), utf8())); });
+ // Inside nested type.
+ // When an extension type is not known by the importer, it is imported
+ // as its storage type and the extension metadata is preserved on the field.
+ TestWithTypeFactory(
+ []() { return list(dict_extension_type()); },
+ []() { return list(GetStorageWithMetadata("item", dict_extension_type())); });
}
TEST_F(TestSchemaRoundtrip, RegisteredExtension) {
@@ -3710,7 +3724,9 @@ TEST_F(TestSchemaRoundtrip, RegisteredExtension) {
TestWithTypeFactory(dict_extension_type);
TestWithTypeFactory(complex128);
- // Inside nested type
+ // Inside nested type.
+ // When the extension type is registered, the extension metadata is removed
+ // from the storage type's field to ensure roundtripping (GH-39865).
TestWithTypeFactory([]() { return list(uuid()); });
TestWithTypeFactory([]() { return list(dict_extension_type()); });
TestWithTypeFactory([]() { return list(complex128()); });
@@ -3810,7 +3826,7 @@ class TestArrayRoundtrip : public ::testing::Test {
{
std::shared_ptr expected;
ASSERT_OK_AND_ASSIGN(expected, ToResult(factory_expected()));
- AssertTypeEqual(*expected->type(), *array->type());
+ AssertTypeEqual(*expected->type(), *array->type(), /*check_metadata=*/true);
AssertArraysEqual(*expected, *array, true);
}
array.reset();
@@ -3850,7 +3866,7 @@ class TestArrayRoundtrip : public ::testing::Test {
{
std::shared_ptr expected;
ASSERT_OK_AND_ASSIGN(expected, ToResult(factory()));
- AssertSchemaEqual(*expected->schema(), *batch->schema());
+ AssertSchemaEqual(*expected->schema(), *batch->schema(), /*check_metadata=*/true);
AssertBatchesEqual(*expected, *batch);
}
batch.reset();
@@ -4230,7 +4246,7 @@ class TestDeviceArrayRoundtrip : public ::testing::Test {
{
std::shared_ptr expected;
ASSERT_OK_AND_ASSIGN(expected, ToResult(factory_expected()));
- AssertTypeEqual(*expected->type(), *array->type());
+ AssertTypeEqual(*expected->type(), *array->type(), /*check_metadata=*/true);
AssertArraysEqual(*expected, *array, true);
}
array.reset();
@@ -4276,7 +4292,7 @@ class TestDeviceArrayRoundtrip : public ::testing::Test {
{
std::shared_ptr expected;
ASSERT_OK_AND_ASSIGN(expected, ToResult(factory()));
- AssertSchemaEqual(*expected->schema(), *batch->schema());
+ AssertSchemaEqual(*expected->schema(), *batch->schema(), /*check_metadata=*/true);
AssertBatchesEqual(*expected, *batch);
}
batch.reset();
@@ -4353,7 +4369,7 @@ class TestArrayStreamExport : public BaseArrayStreamTest {
SchemaExportGuard schema_guard(&c_schema);
ASSERT_FALSE(ArrowSchemaIsReleased(&c_schema));
ASSERT_OK_AND_ASSIGN(auto schema, ImportSchema(&c_schema));
- AssertSchemaEqual(expected, *schema);
+ AssertSchemaEqual(expected, *schema, /*check_metadata=*/true);
}
void AssertStreamEnd(struct ArrowArrayStream* c_stream) {
@@ -4437,7 +4453,7 @@ TEST_F(TestArrayStreamExport, ArrayLifetime) {
{
SchemaExportGuard schema_guard(&c_schema);
ASSERT_OK_AND_ASSIGN(auto got_schema, ImportSchema(&c_schema));
- AssertSchemaEqual(*schema, *got_schema);
+ AssertSchemaEqual(*schema, *got_schema, /*check_metadata=*/true);
}
ASSERT_GT(pool_->bytes_allocated(), orig_allocated_);
@@ -4462,7 +4478,7 @@ TEST_F(TestArrayStreamExport, Errors) {
{
SchemaExportGuard schema_guard(&c_schema);
ASSERT_OK_AND_ASSIGN(auto schema, ImportSchema(&c_schema));
- AssertSchemaEqual(schema, arrow::schema({}));
+ AssertSchemaEqual(schema, arrow::schema({}), /*check_metadata=*/true);
}
struct ArrowArray c_array;
@@ -4539,7 +4555,7 @@ TEST_F(TestArrayStreamRoundtrip, Simple) {
ASSERT_OK_AND_ASSIGN(auto reader, RecordBatchReader::Make(batches, orig_schema));
Roundtrip(std::move(reader), [&](const std::shared_ptr& reader) {
- AssertSchemaEqual(*orig_schema, *reader->schema());
+ AssertSchemaEqual(*orig_schema, *reader->schema(), /*check_metadata=*/true);
AssertReaderNext(reader, *batches[0]);
AssertReaderNext(reader, *batches[1]);
AssertReaderEnd(reader);
diff --git a/cpp/src/arrow/util/key_value_metadata.cc b/cpp/src/arrow/util/key_value_metadata.cc
index bc48ae76c2a2f..002e8b0975094 100644
--- a/cpp/src/arrow/util/key_value_metadata.cc
+++ b/cpp/src/arrow/util/key_value_metadata.cc
@@ -90,7 +90,7 @@ void KeyValueMetadata::Append(std::string key, std::string value) {
values_.push_back(std::move(value));
}
-Result KeyValueMetadata::Get(const std::string& key) const {
+Result KeyValueMetadata::Get(std::string_view key) const {
auto index = FindKey(key);
if (index < 0) {
return Status::KeyError(key);
@@ -129,7 +129,7 @@ Status KeyValueMetadata::DeleteMany(std::vector indices) {
return Status::OK();
}
-Status KeyValueMetadata::Delete(const std::string& key) {
+Status KeyValueMetadata::Delete(std::string_view key) {
auto index = FindKey(key);
if (index < 0) {
return Status::KeyError(key);
@@ -138,20 +138,18 @@ Status KeyValueMetadata::Delete(const std::string& key) {
}
}
-Status KeyValueMetadata::Set(const std::string& key, const std::string& value) {
+Status KeyValueMetadata::Set(std::string key, std::string value) {
auto index = FindKey(key);
if (index < 0) {
- Append(key, value);
+ Append(std::move(key), std::move(value));
} else {
- keys_[index] = key;
- values_[index] = value;
+ keys_[index] = std::move(key);
+ values_[index] = std::move(value);
}
return Status::OK();
}
-bool KeyValueMetadata::Contains(const std::string& key) const {
- return FindKey(key) >= 0;
-}
+bool KeyValueMetadata::Contains(std::string_view key) const { return FindKey(key) >= 0; }
void KeyValueMetadata::reserve(int64_t n) {
DCHECK_GE(n, 0);
@@ -188,7 +186,7 @@ std::vector> KeyValueMetadata::sorted_pairs(
return pairs;
}
-int KeyValueMetadata::FindKey(const std::string& key) const {
+int KeyValueMetadata::FindKey(std::string_view key) const {
for (size_t i = 0; i < keys_.size(); ++i) {
if (keys_[i] == key) {
return static_cast(i);
diff --git a/cpp/src/arrow/util/key_value_metadata.h b/cpp/src/arrow/util/key_value_metadata.h
index 8702ce73a639a..57ade11e75868 100644
--- a/cpp/src/arrow/util/key_value_metadata.h
+++ b/cpp/src/arrow/util/key_value_metadata.h
@@ -20,6 +20,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -44,13 +45,13 @@ class ARROW_EXPORT KeyValueMetadata {
void ToUnorderedMap(std::unordered_map* out) const;
void Append(std::string key, std::string value);
- Result Get(const std::string& key) const;
- bool Contains(const std::string& key) const;
+ Result Get(std::string_view key) const;
+ bool Contains(std::string_view key) const;
// Note that deleting may invalidate known indices
- Status Delete(const std::string& key);
+ Status Delete(std::string_view key);
Status Delete(int64_t index);
Status DeleteMany(std::vector indices);
- Status Set(const std::string& key, const std::string& value);
+ Status Set(std::string key, std::string value);
void reserve(int64_t n);
@@ -63,7 +64,7 @@ class ARROW_EXPORT KeyValueMetadata {
std::vector> sorted_pairs() const;
/// \brief Perform linear search for key, returning -1 if not found
- int FindKey(const std::string& key) const;
+ int FindKey(std::string_view key) const;
std::shared_ptr Copy() const;
From cb5c109a5d6985264203e256ddae0b210251e820 Mon Sep 17 00:00:00 2001
From: Vibhatha Lakmal Abeykoon
Date: Mon, 5 Feb 2024 22:23:50 +0530
Subject: [PATCH 37/74] GH-39946: [Java] Bump com.puppycrawl.tools:checkstyle
from 8.19 to 8.29 (#39694)
### Rationale for this change
This PR was created in place of https://github.com/apache/arrow/pull/39202 to integrate the `puppycrawl.tools.checkstyle` upgrade.
### What changes are included in this PR?
Style changes in Java classes and core changes to the style format itself.
Some unsupported attributes have been removed. And some attributes have
been reorganized upon the provided guidelines in the documentation.
### Are these changes tested?
N/A
Tested by existing checkstyle guideline.
### Are there any user-facing changes?
No
* Closes: #39946
Lead-authored-by: Vibhatha Lakmal Abeykoon
Co-authored-by: vibhatha
Signed-off-by: David Li
---
.../apache/arrow/adapter/jdbc/Constants.java | 3 +-
.../adapter/jdbc/MockPreparedStatement.java | 63 ++++++++++++-------
.../arrow/adapter/jdbc/ResultSetUtility.java | 3 +-
.../apache/arrow/adapter/orc/OrcJniUtils.java | 3 +-
java/dev/checkstyle/checkstyle.xml | 18 +++---
java/dev/checkstyle/suppressions.xml | 2 +-
.../org/apache/arrow/flight/FlightClient.java | 3 +-
.../apache/arrow/flight/FlightGrpcUtils.java | 3 +-
.../org/apache/arrow/flight/FlightStream.java | 3 +-
.../arrow/flight/OutboundStreamListener.java | 3 +-
.../arrow/flight/auth/AuthConstants.java | 3 +-
.../arrow/flight/auth/ServerAuthWrapper.java | 4 +-
.../arrow/flight/TestClientMiddleware.java | 9 ++-
.../integration/tests/OrderedScenario.java | 3 +-
.../jdbc/utils/IntervalStringUtils.java | 3 +-
.../utils/ClientAuthenticationUtilsTest.java | 2 +-
.../evaluator/ConfigurationBuilder.java | 3 +-
.../gandiva/evaluator/DecimalTypeUtil.java | 3 +-
.../gandiva/expression/ArrowTypeHelper.java | 3 +-
.../arrow/gandiva/expression/TreeBuilder.java | 3 +-
java/maven/pom.xml | 2 +-
.../arrow/memory/AllocationListener.java | 15 +++--
.../apache/arrow/memory/BaseAllocator.java | 24 +++----
.../org/apache/arrow/memory/BufferLedger.java | 32 +++++-----
.../apache/arrow/memory/ReferenceManager.java | 6 +-
.../memory/util/ByteFunctionHelpers.java | 3 +-
.../apache/arrow/memory/util/CommonUtil.java | 3 +-
.../arrow/memory/util/LargeMemoryUtil.java | 3 +-
.../org/apache/arrow/util/Collections2.java | 3 +-
.../org/apache/arrow/util/Preconditions.java | 3 +-
java/pom.xml | 2 +-
.../org/apache/arrow/tools/FileToStream.java | 3 +-
.../apache/arrow/vector/AllocationHelper.java | 3 +-
.../apache/arrow/vector/BitVectorHelper.java | 3 +-
.../arrow/vector/GenerateSampleData.java | 3 +-
.../org/apache/arrow/vector/NullVector.java | 3 +-
.../apache/arrow/vector/compare/Range.java | 3 +-
.../arrow/vector/complex/StateTool.java | 3 +-
.../apache/arrow/vector/ipc/ArrowMagic.java | 3 +-
.../vector/ipc/message/FBSerializables.java | 3 +-
.../apache/arrow/vector/util/DateUtility.java | 3 +-
.../arrow/vector/util/DecimalUtility.java | 3 +-
.../arrow/vector/util/DictionaryUtility.java | 3 +-
.../vector/util/ObjectMapperFactory.java | 3 +-
.../arrow/vector/util/SchemaUtility.java | 3 +-
.../testing/ValueVectorDataPopulator.java | 3 +-
46 files changed, 174 insertions(+), 107 deletions(-)
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java
index 5b01077b17996..f95133fc7e44c 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java
@@ -21,7 +21,8 @@
* String constants used for metadata returned on Vectors.
*/
public class Constants {
- private Constants() {}
+ private Constants() {
+ }
public static final String SQL_CATALOG_NAME_KEY = "SQL_CATALOG_NAME";
public static final String SQL_SCHEMA_NAME_KEY = "SQL_SCHEMA_NAME";
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java
index 438a949b736f1..4478cdfbee6f7 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java
@@ -231,7 +231,8 @@ public void setDate(int parameterIndex, Date x, Calendar cal) throws SQLExceptio
}
@Override
- public void setTime(int parameterIndex, Time x, Calendar cal) throws SQLException {}
+ public void setTime(int parameterIndex, Time x, Calendar cal) throws SQLException {
+ }
@Override
public void setTimestamp(int parameterIndex, Timestamp x, Calendar cal) throws SQLException {
@@ -241,7 +242,8 @@ public void setTimestamp(int parameterIndex, Timestamp x, Calendar cal) throws S
}
@Override
- public void setNull(int parameterIndex, int sqlType, String typeName) throws SQLException {}
+ public void setNull(int parameterIndex, int sqlType, String typeName) throws SQLException {
+ }
@Override
public void setURL(int parameterIndex, URL x) throws SQLException {
@@ -259,62 +261,80 @@ public void setRowId(int parameterIndex, RowId x) throws SQLException {
}
@Override
- public void setNString(int parameterIndex, String value) throws SQLException {}
+ public void setNString(int parameterIndex, String value) throws SQLException {
+ }
@Override
public void setNCharacterStream(int parameterIndex, Reader value, long length)
- throws SQLException {}
+ throws SQLException {
+ }
@Override
- public void setNClob(int parameterIndex, NClob value) throws SQLException {}
+ public void setNClob(int parameterIndex, NClob value) throws SQLException {
+ }
@Override
- public void setClob(int parameterIndex, Reader reader, long length) throws SQLException {}
+ public void setClob(int parameterIndex, Reader reader, long length) throws SQLException {
+ }
@Override
public void setBlob(int parameterIndex, InputStream inputStream, long length)
- throws SQLException {}
+ throws SQLException {
+ }
@Override
- public void setNClob(int parameterIndex, Reader reader, long length) throws SQLException {}
+ public void setNClob(int parameterIndex, Reader reader, long length) throws SQLException {
+ }
@Override
- public void setSQLXML(int parameterIndex, SQLXML xmlObject) throws SQLException {}
+ public void setSQLXML(int parameterIndex, SQLXML xmlObject) throws SQLException {
+ }
@Override
public void setObject(int parameterIndex, Object x, int targetSqlType, int scaleOrLength)
- throws SQLException {}
+ throws SQLException {
+ }
@Override
- public void setAsciiStream(int parameterIndex, InputStream x, long length) throws SQLException {}
+ public void setAsciiStream(int parameterIndex, InputStream x, long length) throws SQLException {
+ }
@Override
- public void setBinaryStream(int parameterIndex, InputStream x, long length) throws SQLException {}
+ public void setBinaryStream(int parameterIndex, InputStream x, long length) throws SQLException {
+ }
@Override
public void setCharacterStream(int parameterIndex, Reader reader, long length)
- throws SQLException {}
+ throws SQLException {
+ }
@Override
- public void setAsciiStream(int parameterIndex, InputStream x) throws SQLException {}
+ public void setAsciiStream(int parameterIndex, InputStream x) throws SQLException {
+ }
@Override
- public void setBinaryStream(int parameterIndex, InputStream x) throws SQLException {}
+ public void setBinaryStream(int parameterIndex, InputStream x) throws SQLException {
+ }
@Override
- public void setCharacterStream(int parameterIndex, Reader reader) throws SQLException {}
+ public void setCharacterStream(int parameterIndex, Reader reader) throws SQLException {
+ }
@Override
- public void setNCharacterStream(int parameterIndex, Reader value) throws SQLException {}
+ public void setNCharacterStream(int parameterIndex, Reader value) throws SQLException {
+ }
@Override
- public void setClob(int parameterIndex, Reader reader) throws SQLException {}
+ public void setClob(int parameterIndex, Reader reader) throws SQLException {
+ }
@Override
- public void setBlob(int parameterIndex, InputStream inputStream) throws SQLException {}
+ public void setBlob(int parameterIndex, InputStream inputStream) throws SQLException {
+ }
@Override
- public void setNClob(int parameterIndex, Reader reader) throws SQLException {}
+ public void setNClob(int parameterIndex, Reader reader) throws SQLException {
+ }
@Override
public ResultSet executeQuery(String sql) throws SQLException {
@@ -327,7 +347,8 @@ public int executeUpdate(String sql) throws SQLException {
}
@Override
- public void close() throws SQLException {}
+ public void close() throws SQLException {
+ }
@Override
public int getMaxFieldSize() throws SQLException {
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java
index c712741b51f5b..ccc7681c5bc8b 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java
@@ -348,7 +348,8 @@ public static class MockColumnMetaData {
private int displaySize;
- private MockColumnMetaData() {}
+ private MockColumnMetaData() {
+ }
private String getLabel() {
return label;
diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java
index 9b599234bdf51..d61799e990f77 100644
--- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java
+++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java
@@ -32,7 +32,8 @@ class OrcJniUtils {
private static final String LIBRARY_NAME = "arrow_orc_jni";
private static boolean isLoaded = false;
- private OrcJniUtils() {}
+ private OrcJniUtils() {
+ }
static void loadOrcAdapterLibraryFromJar()
throws IOException, IllegalAccessException {
diff --git a/java/dev/checkstyle/checkstyle.xml b/java/dev/checkstyle/checkstyle.xml
index c27f382ddda76..b63a4a9cba1f3 100644
--- a/java/dev/checkstyle/checkstyle.xml
+++ b/java/dev/checkstyle/checkstyle.xml
@@ -60,6 +60,11 @@
+
+
+
+
+
@@ -72,10 +77,6 @@
-
-
-
-
@@ -223,13 +224,12 @@
-
-
-
-
-
+
+
+
+
diff --git a/java/dev/checkstyle/suppressions.xml b/java/dev/checkstyle/suppressions.xml
index 585985bf32dbc..a3536e2ca9212 100644
--- a/java/dev/checkstyle/suppressions.xml
+++ b/java/dev/checkstyle/suppressions.xml
@@ -40,5 +40,5 @@
-
+
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
index fc491ebe0df98..8f251a7c7ef07 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
@@ -437,7 +437,8 @@ public ClientStreamListener getWriter() {
*/
public void getResult() {
// After exchange is complete, make sure stream is drained to propagate errors through reader
- while (reader.next()) { };
+ while (reader.next()) {
+ }
}
/** Shut down the streams in this call. */
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java
index eb5e492b4cd46..b711d7ef6b5d7 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java
@@ -125,7 +125,8 @@ public void enterIdle() {
}
}
- private FlightGrpcUtils() {}
+ private FlightGrpcUtils() {
+ }
/**
* Creates a Flight service.
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java
index 7a5a941603ace..84beee7d40564 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java
@@ -194,7 +194,8 @@ public void close() throws Exception {
}
}
// Drain the stream without the lock (as next() implicitly needs the lock)
- while (next()) { }
+ while (next()) {
+ }
} catch (FlightRuntimeException e) {
suppressor = e;
}
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java
index e80fb41c67273..80ddad90a1d28 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java
@@ -119,5 +119,6 @@ default void start(VectorSchemaRoot root, DictionaryProvider dictionaries) {
* The default value can be toggled globally by setting the JVM property arrow.flight.enable_zero_copy_write
* or the environment variable ARROW_FLIGHT_ENABLE_ZERO_COPY_WRITE.
*/
- default void setUseZeroCopy(boolean enabled) {}
+ default void setUseZeroCopy(boolean enabled) {
+ }
}
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java
index e3ccdc626d71b..8a37115f1f024 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java
@@ -47,5 +47,6 @@ public byte[] parseBytes(byte[] serialized) {
public static final Context.Key PEER_IDENTITY_KEY = Context.keyWithDefault("arrow-flight-peer-identity", "");
- private AuthConstants() {}
+ private AuthConstants() {
+ }
}
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java
index ad1a36a935fd7..3647e113cc0f6 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java
@@ -115,7 +115,9 @@ public boolean hasNext() {
@Override
public void onError(Throwable t) {
completed = true;
- while (future == null) {/* busy wait */}
+ while (future == null) {
+ /* busy wait */
+ }
future.cancel(true);
}
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java
index bcff54bd7f66f..a1fa1f1d18509 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java
@@ -303,10 +303,12 @@ public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {
}
@Override
- public void onCallCompleted(CallStatus status) {}
+ public void onCallCompleted(CallStatus status) {
+ }
@Override
- public void onCallErrored(Throwable err) {}
+ public void onCallErrored(Throwable err) {
+ }
}
static class MultiHeaderClientMiddlewareFactory implements FlightClientMiddleware.Factory {
@@ -356,6 +358,7 @@ public void onHeadersReceived(CallHeaders incomingHeaders) {
}
@Override
- public void onCallCompleted(CallStatus status) {}
+ public void onCallCompleted(CallStatus status) {
+ }
}
}
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/OrderedScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/OrderedScenario.java
index b8aa46fb5674a..13238f318eaaa 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/OrderedScenario.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/OrderedScenario.java
@@ -55,7 +55,8 @@ public FlightProducer producer(BufferAllocator allocator, Location location) thr
}
@Override
- public void buildServer(FlightServer.Builder builder) throws Exception {}
+ public void buildServer(FlightServer.Builder builder) throws Exception {
+ }
@Override
public void client(BufferAllocator allocator, Location location, FlightClient client)
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java
index fdf6c508d93b0..de6dccad4a846 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java
@@ -31,7 +31,8 @@ public final class IntervalStringUtils {
/**
* Constructor Method of class.
*/
- private IntervalStringUtils( ) {}
+ private IntervalStringUtils( ) {
+ }
/**
* Formats a period similar to Oracle INTERVAL YEAR TO MONTH data type
.
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java
index b7977462e9c01..78d252f7824c3 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java
@@ -84,7 +84,7 @@ public void testGetDefaultKeyStoreInstancePassword() throws IOException,
keyStoreMockedStatic
.when(() -> ClientAuthenticationUtils.getDefaultKeyStoreInstance("changeit"))
- .thenReturn(keyStoreMock);
+ .thenReturn(keyStoreMock);
KeyStore receiveKeyStore = ClientAuthenticationUtils.getDefaultKeyStoreInstance("changeit");
Assert.assertEquals(receiveKeyStore, keyStoreMock);
}
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
index e903b4e873278..fa5d285b90997 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
@@ -43,7 +43,8 @@ public static ConfigOptions getDefault() {
return new ConfigOptions();
}
- public ConfigOptions() {}
+ public ConfigOptions() {
+ }
public ConfigOptions withOptimize(boolean optimize) {
this.optimize = optimize;
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java
index e0c072cfbe52e..703cfaa8be88b 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java
@@ -23,7 +23,8 @@
* Utility methods for working with {@link Decimal} values.
*/
public class DecimalTypeUtil {
- private DecimalTypeUtil() {}
+ private DecimalTypeUtil() {
+ }
/**
* Enum for supported mathematical operations.
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java
index 90f8684b455a8..e7377cc5c9db4 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java
@@ -33,7 +33,8 @@
* Utility methods to convert between Arrow and Gandiva types.
*/
public class ArrowTypeHelper {
- private ArrowTypeHelper() {}
+ private ArrowTypeHelper() {
+ }
static final int WIDTH_8 = 8;
static final int WIDTH_16 = 16;
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
index 8656e886aae24..3d2ea27d044e7 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
@@ -29,7 +29,8 @@
* Contains helper functions for constructing expression trees.
*/
public class TreeBuilder {
- private TreeBuilder() {}
+ private TreeBuilder() {
+ }
/**
* Helper functions to create literal constants.
diff --git a/java/maven/pom.xml b/java/maven/pom.xml
index 3a88ec762e19c..7fdca7db7b8d8 100644
--- a/java/maven/pom.xml
+++ b/java/maven/pom.xml
@@ -235,7 +235,7 @@
com.puppycrawl.tools
checkstyle
- 8.19
+ 8.29
org.slf4j
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java
index ff2b25dfa30ab..b8de6d819eaf8 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java
@@ -34,7 +34,8 @@ public interface AllocationListener {
*
* @param size the buffer size being allocated
*/
- default void onPreAllocation(long size) {}
+ default void onPreAllocation(long size) {
+ }
/**
* Called each time a new buffer has been allocated.
@@ -43,7 +44,8 @@ default void onPreAllocation(long size) {}
*
* @param size the buffer size being allocated
*/
- default void onAllocation(long size) {}
+ default void onAllocation(long size) {
+ }
/**
* Informed each time a buffer is released from allocation.
@@ -51,7 +53,8 @@ default void onAllocation(long size) {}
* An exception cannot be thrown by this method.
* @param size The size of the buffer being released.
*/
- default void onRelease(long size) {}
+ default void onRelease(long size) {
+ }
/**
@@ -73,7 +76,8 @@ default boolean onFailedAllocation(long size, AllocationOutcome outcome) {
* @param parentAllocator The parent allocator to which a child was added
* @param childAllocator The child allocator that was just added
*/
- default void onChildAdded(BufferAllocator parentAllocator, BufferAllocator childAllocator) {}
+ default void onChildAdded(BufferAllocator parentAllocator, BufferAllocator childAllocator) {
+ }
/**
* Called immediately after a child allocator was removed from the parent allocator.
@@ -81,5 +85,6 @@ default void onChildAdded(BufferAllocator parentAllocator, BufferAllocator child
* @param parentAllocator The parent allocator from which a child was removed
* @param childAllocator The child allocator that was just removed
*/
- default void onChildRemoved(BufferAllocator parentAllocator, BufferAllocator childAllocator) {}
+ default void onChildRemoved(BufferAllocator parentAllocator, BufferAllocator childAllocator) {
+ }
}
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
index 8779c7a3434ea..189c800ba0fe5 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
@@ -702,18 +702,18 @@ private void verifyAllocator(
void print(StringBuilder sb, int level, Verbosity verbosity) {
CommonUtil.indent(sb, level)
- .append("Allocator(")
- .append(name)
- .append(") ")
- .append(reservation)
- .append('/')
- .append(getAllocatedMemory())
- .append('/')
- .append(getPeakMemoryAllocation())
- .append('/')
- .append(getLimit())
- .append(" (res/actual/peak/limit)")
- .append('\n');
+ .append("Allocator(")
+ .append(name)
+ .append(") ")
+ .append(reservation)
+ .append('/')
+ .append(getAllocatedMemory())
+ .append('/')
+ .append(getPeakMemoryAllocation())
+ .append('/')
+ .append(getLimit())
+ .append(" (res/actual/peak/limit)")
+ .append('\n');
if (DEBUG) {
CommonUtil.indent(sb, level + 1).append(String.format("child allocators: %d\n", childAllocators.size()));
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java
index 1ca3e08ecf046..62d268a1f4493 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java
@@ -478,20 +478,20 @@ public long getAccountedSize() {
*/
void print(StringBuilder sb, int indent, BaseAllocator.Verbosity verbosity) {
CommonUtil.indent(sb, indent)
- .append("ledger[")
- .append(ledgerId)
- .append("] allocator: ")
- .append(allocator.getName())
- .append("), isOwning: ")
- .append(", size: ")
- .append(", references: ")
- .append(bufRefCnt.get())
- .append(", life: ")
- .append(lCreationTime)
- .append("..")
- .append(lDestructionTime)
- .append(", allocatorManager: [")
- .append(", life: ");
+ .append("ledger[")
+ .append(ledgerId)
+ .append("] allocator: ")
+ .append(allocator.getName())
+ .append("), isOwning: ")
+ .append(", size: ")
+ .append(", references: ")
+ .append(bufRefCnt.get())
+ .append(", life: ")
+ .append(lCreationTime)
+ .append("..")
+ .append(lDestructionTime)
+ .append(", allocatorManager: [")
+ .append(", life: ");
if (!BaseAllocator.DEBUG) {
sb.append("]\n");
@@ -499,8 +499,8 @@ void print(StringBuilder sb, int indent, BaseAllocator.Verbosity verbosity) {
Preconditions.checkArgument(buffers != null, "IdentityHashMap of buffers must not be null");
synchronized (buffers) {
sb.append("] holds ")
- .append(buffers.size())
- .append(" buffers. \n");
+ .append(buffers.size())
+ .append(" buffers. \n");
for (ArrowBuf buf : buffers.keySet()) {
buf.print(sb, indent + 2, verbosity);
sb.append('\n');
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java
index 7d4de18751ba9..64a4232d8aeb7 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java
@@ -141,10 +141,12 @@ public boolean release(int decrement) {
}
@Override
- public void retain() { }
+ public void retain() {
+ }
@Override
- public void retain(int increment) { }
+ public void retain(int increment) {
+ }
@Override
public ArrowBuf retain(ArrowBuf srcBuffer, BufferAllocator targetAllocator) {
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java
index 9579245ca7004..79d21fa040876 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java
@@ -32,7 +32,8 @@ public class ByteFunctionHelpers {
private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
- private ByteFunctionHelpers() {}
+ private ByteFunctionHelpers() {
+ }
/**
* Helper function to check for equality of bytes in two ArrowBufs.
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java
index ccca7b1e03093..707c5f1556062 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java
@@ -24,7 +24,8 @@
*/
public final class CommonUtil {
- private CommonUtil() { }
+ private CommonUtil() {
+ }
/**
* Rounds up the provided value to the nearest power of two.
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java
index db63bbd14ba5f..94a7873664216 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java
@@ -22,7 +22,8 @@
/** Contains utilities for dealing with a 64-bit address base. */
public final class LargeMemoryUtil {
- private LargeMemoryUtil() {}
+ private LargeMemoryUtil() {
+ }
/**
* Casts length to an int, but raises an exception the value is outside
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java
index 6b01a61ebca39..b88372abaaee1 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java
@@ -34,7 +34,8 @@
* Utility methods for manipulating {@link java.util.Collections} and their subclasses/implementations.
*/
public final class Collections2 {
- private Collections2() {}
+ private Collections2() {
+ }
/**
* Creates a {@link List} from the elements remaining in iterator.
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java
index 8083033007d9c..5e4323cfc9c61 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java
@@ -111,7 +111,8 @@
* @since 2.0
*/
public final class Preconditions {
- private Preconditions() {}
+ private Preconditions() {
+ }
/**
* Ensures the truth of an expression involving one or more parameters to the calling method.
diff --git a/java/pom.xml b/java/pom.xml
index 7871303634976..b2b300b2f3fed 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -304,7 +304,7 @@
com.puppycrawl.tools
checkstyle
- 8.19
+ 8.29
org.slf4j
diff --git a/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java b/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java
index bb7cedeb74579..3d9bca58a763c 100644
--- a/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java
+++ b/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java
@@ -34,7 +34,8 @@
* first argument and the output is written to standard out.
*/
public class FileToStream {
- private FileToStream() {}
+ private FileToStream() {
+ }
/**
* Reads an Arrow file from in and writes it back to out.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java b/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java
index 6824756d8aca7..abece39475016 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java
@@ -22,7 +22,8 @@
/** Helper utility methods for allocating storage for Vectors. */
public class AllocationHelper {
- private AllocationHelper() {}
+ private AllocationHelper() {
+ }
/**
* Allocates the vector.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
index 568554ba75ed6..10f343e260ccc 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
@@ -33,7 +33,8 @@
*/
public class BitVectorHelper {
- private BitVectorHelper() {}
+ private BitVectorHelper() {
+ }
/**
* Get the index of byte corresponding to bit index in validity buffer.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java b/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java
index 6cda18a8a53d3..be501ce245410 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java
@@ -27,7 +27,8 @@
* with sample data. This class should be used for that purpose.
*/
public class GenerateSampleData {
- private GenerateSampleData() {}
+ private GenerateSampleData() {
+ }
/** Populates vector
with valueCount
random values. */
public static void generateTestData(final ValueVector vector, final int valueCount) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
index d7b147feb152f..3b734bbf6608b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
@@ -300,7 +300,8 @@ public int getNullCount() {
* @param index position of element
*/
@Override
- public void setNull(int index) {}
+ public void setNull(int index) {
+ }
@Override
public boolean isNull(int index) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java
index 0de99ab011f66..76db0734464ed 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java
@@ -41,7 +41,8 @@ public class Range {
/**
* Constructs a new instance.
*/
- public Range() {}
+ public Range() {
+ }
/**
* Constructs a new instance.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java
index 0098f68360a1a..2cd64c4fc6766 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java
@@ -23,7 +23,8 @@
* Utility methods for state machines based on enums.
*/
public class StateTool {
- private StateTool() {}
+ private StateTool() {
+ }
static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StateTool.class);
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java
index 9c399669affc3..b16315caa9f51 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java
@@ -25,7 +25,8 @@
* Magic header/footer helpers for {@link ArrowFileWriter} and {@link ArrowFileReader} formatted files.
*/
class ArrowMagic {
- private ArrowMagic(){}
+ private ArrowMagic(){
+ }
private static final byte[] MAGIC = "ARROW1".getBytes(StandardCharsets.UTF_8);
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java
index 26736ed91c5ca..59b3bb07bcf16 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java
@@ -31,7 +31,8 @@
* Utility methods for {@linkplain org.apache.arrow.vector.ipc.message.FBSerializable}s.
*/
public class FBSerializables {
- private FBSerializables() {}
+ private FBSerializables() {
+ }
/**
* Writes every element of all to builder and calls {@link FlatBufferBuilder#endVector()} afterwards.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java
index 9e8b6d26f6fd7..f7f975a0d0e7b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java
@@ -26,7 +26,8 @@
/** Utility class for Date, DateTime, TimeStamp, Interval data types. */
public class DateUtility {
- private DateUtility() {}
+ private DateUtility() {
+ }
private static final String UTC = "UTC";
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
index 0dfb61dcdf269..4635822e5141b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
@@ -29,7 +29,8 @@
* Utility methods for configurable precision Decimal values (e.g. {@link BigDecimal}).
*/
public class DecimalUtility {
- private DecimalUtility() {}
+ private DecimalUtility() {
+ }
public static final byte [] zeroes = new byte[] {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java
index 9592f3975ab99..76fb585e6bd3a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java
@@ -35,7 +35,8 @@
* Utility methods for working with Dictionaries used in Dictionary encodings.
*/
public class DictionaryUtility {
- private DictionaryUtility() {}
+ private DictionaryUtility() {
+ }
/**
* Convert field and child fields that have a dictionary encoding to message format, so fields
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/ObjectMapperFactory.java b/java/vector/src/main/java/org/apache/arrow/vector/util/ObjectMapperFactory.java
index 39488e96efda0..5fa4c1b2260e3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/ObjectMapperFactory.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/ObjectMapperFactory.java
@@ -26,7 +26,8 @@
*/
public final class ObjectMapperFactory {
- private ObjectMapperFactory() {}
+ private ObjectMapperFactory() {
+ }
/**
* Creates a new {@link ObjectMapper} instance.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java
index f8167604c21ad..5b3d00f6b7362 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java
@@ -33,7 +33,8 @@
* Schema utility class including serialization and deserialization.
*/
public class SchemaUtility {
- private SchemaUtility() {}
+ private SchemaUtility() {
+ }
/**
* Deserialize Arrow schema from byte array.
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
index f9f0357861c15..9e96e75880522 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
@@ -75,7 +75,8 @@
*/
public class ValueVectorDataPopulator {
- private ValueVectorDataPopulator(){}
+ private ValueVectorDataPopulator() {
+ }
/**
* Populate values for BigIntVector.
From 0c88d13341dfaba5109683bda25ee3ffcd808080 Mon Sep 17 00:00:00 2001
From: mwish
Date: Tue, 6 Feb 2024 01:34:37 +0800
Subject: [PATCH 38/74] GH-39704: [C++][Parquet] Benchmark levels decoding
(#39705)
### Rationale for this change
This patch add the level-decoding benchmark. It test:
1. Different max-level (for flat type, maximum level would be 1, for nested type, it would grows)
2. With different repeat ( repeated null / non-null is different from non-repeated data)
3. With different read-batch size. This part of logic is a bit tricky in original code
### What changes are included in this PR?
Add Level decoding benchmark
### Are these changes tested?
No need
### Are there any user-facing changes?
no
* Closes: #39704
Authored-by: mwish
Signed-off-by: Antoine Pitrou
---
cpp/src/parquet/column_reader_benchmark.cc | 98 ++++++++++++++++++++++
cpp/src/parquet/column_writer_test.cc | 4 +-
2 files changed, 100 insertions(+), 2 deletions(-)
diff --git a/cpp/src/parquet/column_reader_benchmark.cc b/cpp/src/parquet/column_reader_benchmark.cc
index 49b2317ede187..61fe397cf1c30 100644
--- a/cpp/src/parquet/column_reader_benchmark.cc
+++ b/cpp/src/parquet/column_reader_benchmark.cc
@@ -219,5 +219,103 @@ BENCHMARK(RecordReaderReadRecords)
->Args({2, 1000, true})
->Args({2, 1000, false});
+void GenerateLevels(int level_repeats, int max_level, int num_levels,
+ std::vector* levels) {
+ // Generate random levels
+ std::default_random_engine gen(/*seed=*/1943);
+ std::uniform_int_distribution d(0, max_level);
+ for (int i = 0; i < num_levels;) {
+ int16_t current_level = d(gen); // level repeat `level_repeats` times
+ const int current_repeated = std::min(level_repeats, num_levels - i);
+ levels->insert(levels->end(), current_repeated, current_level);
+ i += current_repeated;
+ }
+}
+
+void EncodeLevels(Encoding::type encoding, int16_t max_level, int num_levels,
+ const int16_t* input_levels, std::vector* bytes) {
+ LevelEncoder encoder;
+ // encode levels
+ if (encoding == Encoding::RLE) {
+ int rle_size = LevelEncoder::MaxBufferSize(encoding, max_level, num_levels);
+ bytes->resize(rle_size + sizeof(int32_t));
+ // leave space to write the rle length value
+ encoder.Init(encoding, max_level, num_levels, bytes->data() + sizeof(int32_t),
+ rle_size);
+ encoder.Encode(num_levels, input_levels);
+ int data_length = encoder.len();
+ memcpy(bytes->data(), &data_length, sizeof(int32_t));
+ } else {
+ int bitpack_size =
+ LevelEncoder::MaxBufferSize(encoding, max_level, num_levels) + sizeof(int32_t);
+ bytes->resize(bitpack_size);
+ encoder.Init(encoding, max_level, num_levels, bytes->data(),
+ static_cast(bytes->size()));
+ encoder.Encode(num_levels, input_levels);
+ }
+}
+
+static void DecodeLevels(Encoding::type level_encoding, int16_t max_level, int num_levels,
+ int batch_size, int level_repeat_count,
+ ::benchmark::State& state) {
+ std::vector bytes;
+ {
+ std::vector input_levels;
+ GenerateLevels(/*level_repeats=*/level_repeat_count, /*max_repeat_factor=*/max_level,
+ num_levels, &input_levels);
+ EncodeLevels(level_encoding, max_level, num_levels, input_levels.data(), &bytes);
+ }
+
+ LevelDecoder decoder;
+ std::vector output_levels(batch_size);
+ for (auto _ : state) {
+ state.PauseTiming();
+ decoder.SetData(level_encoding, max_level, num_levels, bytes.data(),
+ static_cast(bytes.size()));
+ state.ResumeTiming();
+ // Decode multiple times with batch_size
+ while (true) {
+ int levels_decoded = decoder.Decode(batch_size, output_levels.data());
+ if (levels_decoded == 0) {
+ break;
+ }
+ }
+ }
+ state.SetBytesProcessed(state.iterations() * num_levels * sizeof(int16_t));
+ state.SetItemsProcessed(state.iterations() * num_levels);
+}
+
+static void ReadLevels_Rle(::benchmark::State& state) {
+ int16_t max_level = static_cast(state.range(0));
+ int num_levels = static_cast(state.range(1));
+ int batch_size = static_cast(state.range(2));
+ int level_repeat_count = static_cast(state.range(3));
+ DecodeLevels(Encoding::RLE, max_level, num_levels, batch_size, level_repeat_count,
+ state);
+}
+
+static void ReadLevels_BitPack(::benchmark::State& state) {
+ int16_t max_level = static_cast(state.range(0));
+ int num_levels = static_cast(state.range(1));
+ int batch_size = static_cast(state.range(2));
+ int level_repeat_count = static_cast(state.range(3));
+ DecodeLevels(Encoding::BIT_PACKED, max_level, num_levels, batch_size,
+ level_repeat_count, state);
+}
+
+static void ReadLevelsArguments(::benchmark::internal::Benchmark* b) {
+ b->ArgNames({"MaxLevel", "NumLevels", "BatchSize", "LevelRepeatCount"})
+ ->Args({1, 8096, 1024, 1})
+ ->Args({1, 8096, 1024, 7})
+ ->Args({1, 8096, 1024, 1024})
+ ->Args({1, 8096, 2048, 1})
+ ->Args({3, 8096, 1024, 1})
+ ->Args({3, 8096, 2048, 1})
+ ->Args({3, 8096, 1024, 7});
+}
+
+BENCHMARK(ReadLevels_Rle)->Apply(ReadLevelsArguments);
+BENCHMARK(ReadLevels_BitPack)->Apply(ReadLevelsArguments);
+
} // namespace benchmark
} // namespace parquet
diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc
index 97421629d2ca6..a40e71ce30aec 100644
--- a/cpp/src/parquet/column_writer_test.cc
+++ b/cpp/src/parquet/column_writer_test.cc
@@ -1021,7 +1021,7 @@ void EncodeLevels(Encoding::type encoding, int16_t max_level, int num_levels,
}
void VerifyDecodingLevels(Encoding::type encoding, int16_t max_level,
- std::vector& input_levels,
+ const std::vector& input_levels,
std::vector& bytes) {
LevelDecoder decoder;
int levels_count = 0;
@@ -1060,7 +1060,7 @@ void VerifyDecodingLevels(Encoding::type encoding, int16_t max_level,
}
void VerifyDecodingMultipleSetData(Encoding::type encoding, int16_t max_level,
- std::vector& input_levels,
+ const std::vector& input_levels,
std::vector>& bytes) {
LevelDecoder decoder;
int levels_count = 0;
From de53aac762fc703148f5822ed170b462a6b467d8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 5 Feb 2024 10:01:02 -0800
Subject: [PATCH 39/74] MINOR: [C#] Bump Grpc.Tools from 2.60.0 to 2.61.0 in
/csharp (#39945)
Bumps [Grpc.Tools](https://github.com/grpc/grpc) from 2.60.0 to 2.61.0.
Commits
[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Grpc.Tools&package-manager=nuget&previous-version=2.60.0&new-version=2.61.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)
Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.
[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)
---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher
---
csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
index 68c3e47e01902..3a6ae28b390d2 100644
--- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
+++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
@@ -7,7 +7,7 @@
-
+
From 26801f147a9e98bb6c5bc4e7131bdf1bc2794467 Mon Sep 17 00:00:00 2001
From: Matt Topol
Date: Mon, 5 Feb 2024 15:29:06 -0500
Subject: [PATCH 40/74] GH-39769: [C++][Device] Fix Importing nested and string
types for DeviceArray (#39770)
### Rationale for this change
In my testing with libcudf and other GPU data, I discovered a deficiency in ImportDeviceArray and thus ImportDeviceRecordBatch where the device type and memory manager aren't propagated to child importers and it fails to import offset-based types such as strings.
### What changes are included in this PR?
These are relatively easily handled by first ensuring that `ImportChild` propagates the device_type and memory manager from the parent. Then for importing offset based values we merely need to use the memory manager to copy the final offset value to the CPU to use for the buffer size computation.
This will work for any device which has implemented CopyBufferTo/From
### Are these changes tested?
A new test is added to test these situations.
* Closes: #39769
Authored-by: Matt Topol
Signed-off-by: Matt Topol
---
cpp/src/arrow/c/bridge.cc | 23 ++++++++++++++++++++---
cpp/src/arrow/c/bridge_test.cc | 10 ++++++++++
cpp/src/arrow/device.cc | 14 ++++++++++++++
3 files changed, 44 insertions(+), 3 deletions(-)
diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 9b165a10a61e7..119249da99a6d 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -1543,6 +1543,8 @@ struct ArrayImporter {
if (recursion_level_ >= kMaxImportRecursionLevel) {
return Status::Invalid("Recursion level in ArrowArray struct exceeded");
}
+ device_type_ = parent->device_type_;
+ memory_mgr_ = parent->memory_mgr_;
// Child buffers will keep the entire parent import alive.
// Perhaps we can move the child structs to an owned area
// when the parent ImportedArrayData::Release() gets called,
@@ -1857,10 +1859,25 @@ struct ArrayImporter {
template
Status ImportStringValuesBuffer(int32_t offsets_buffer_id, int32_t buffer_id,
int64_t byte_width = 1) {
- auto offsets = data_->GetValues(offsets_buffer_id);
+ if (device_type_ == DeviceAllocationType::kCPU) {
+ auto offsets = data_->GetValues(offsets_buffer_id);
+ // Compute visible size of buffer
+ int64_t buffer_size =
+ (c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 0;
+ return ImportBuffer(buffer_id, buffer_size);
+ }
+
+ // we only need the value of the last offset so let's just copy that
+ // one value from device to host.
+ auto single_value_buf =
+ SliceBuffer(data_->buffers[offsets_buffer_id],
+ c_struct_->length * sizeof(OffsetType), sizeof(OffsetType));
+ ARROW_ASSIGN_OR_RAISE(
+ auto cpubuf, Buffer::ViewOrCopy(single_value_buf, default_cpu_memory_manager()));
+ auto offsets = cpubuf->data_as();
// Compute visible size of buffer
- int64_t buffer_size =
- (c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 0;
+ int64_t buffer_size = (c_struct_->length > 0) ? byte_width * offsets[0] : 0;
+
return ImportBuffer(buffer_id, buffer_size);
}
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 8b67027454c55..b8d5e0fcd3845 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -4320,6 +4320,16 @@ TEST_F(TestDeviceArrayRoundtrip, Primitive) {
TestWithJSON(mm, int32(), "[4, 5, null]");
}
+TEST_F(TestDeviceArrayRoundtrip, Struct) {
+ std::shared_ptr device = std::make_shared(1);
+ auto mm = device->default_memory_manager();
+ auto type = struct_({field("ints", int16()), field("strs", utf8())});
+
+ TestWithJSON(mm, type, "[]");
+ TestWithJSON(mm, type, R"([[4, "foo"], [5, "bar"]])");
+ TestWithJSON(mm, type, R"([[4, null], null, [5, "foo"]])");
+}
+
////////////////////////////////////////////////////////////////////////////
// Array stream export tests
diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc
index 616f89aae896f..3736a4e018c33 100644
--- a/cpp/src/arrow/device.cc
+++ b/cpp/src/arrow/device.cc
@@ -195,6 +195,13 @@ Result> CPUMemoryManager::ViewBufferFrom(
if (!from->is_cpu()) {
return nullptr;
}
+ // in this case the memory manager we're coming from is visible on the CPU,
+ // but uses an allocation type other than CPU. Since we know the data is visible
+ // to the CPU a "View" of this should use the CPUMemoryManager as the listed memory
+ // manager.
+ if (buf->device_type() != DeviceAllocationType::kCPU) {
+ return std::make_shared(buf->address(), buf->size(), shared_from_this(), buf);
+ }
return buf;
}
@@ -220,6 +227,13 @@ Result> CPUMemoryManager::ViewBufferTo(
if (!to->is_cpu()) {
return nullptr;
}
+ // in this case the memory manager we're coming from is visible on the CPU,
+ // but uses an allocation type other than CPU. Since we know the data is visible
+ // to the CPU a "View" of this should use the CPUMemoryManager as the listed memory
+ // manager.
+ if (buf->device_type() != DeviceAllocationType::kCPU) {
+ return std::make_shared(buf->address(), buf->size(), to, buf);
+ }
return buf;
}
From fd69d307447888101600376fa3016b727a3e0106 Mon Sep 17 00:00:00 2001
From: ZhangHuiGui <106943008+ZhangHuiGui@users.noreply.github.com>
Date: Tue, 6 Feb 2024 06:11:36 +0800
Subject: [PATCH 41/74] GH-39860: [C++] Expression ExecuteScalarExpression
execute empty args function with a wrong result (#39908)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
### Rationale for this change
Try to fix #39860.
### What changes are included in this PR?
Deal with the call->arguments.size() == 0's condition in ExecuteScalarExpression when we call some functions
has no arguments, like (random, hash_count ...).
### Are these changes tested?
Yes
### Are there any user-facing changes?
No.
* Closes: #39860
Lead-authored-by: hugo.zhang
Co-authored-by: 张回归
Signed-off-by: Benjamin Kietzman
---
cpp/src/arrow/compute/expression.cc | 13 +++++++++++--
cpp/src/arrow/compute/expression_test.cc | 19 +++++++++++++++++++
2 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/cpp/src/arrow/compute/expression.cc b/cpp/src/arrow/compute/expression.cc
index b47e0a35525c5..8c59ad1df86f2 100644
--- a/cpp/src/arrow/compute/expression.cc
+++ b/cpp/src/arrow/compute/expression.cc
@@ -761,6 +761,15 @@ Result ExecuteScalarExpression(const Expression& expr, const ExecBatch& i
}
}
+ int64_t input_length;
+ if (!arguments.empty() && all_scalar) {
+ // all inputs are scalar, so use a 1-long batch to avoid
+ // computing input.length equivalent outputs
+ input_length = 1;
+ } else {
+ input_length = input.length;
+ }
+
auto executor = compute::detail::KernelExecutor::MakeScalar();
compute::KernelContext kernel_context(exec_context, call->kernel);
@@ -772,8 +781,8 @@ Result ExecuteScalarExpression(const Expression& expr, const ExecBatch& i
RETURN_NOT_OK(executor->Init(&kernel_context, {kernel, types, options}));
compute::detail::DatumAccumulator listener;
- RETURN_NOT_OK(executor->Execute(
- ExecBatch(std::move(arguments), all_scalar ? 1 : input.length), &listener));
+ RETURN_NOT_OK(
+ executor->Execute(ExecBatch(std::move(arguments), input_length), &listener));
const auto out = executor->WrapResults(arguments, listener.values());
#ifndef NDEBUG
DCHECK_OK(executor->CheckResultType(out, call->function_name.c_str()));
diff --git a/cpp/src/arrow/compute/expression_test.cc b/cpp/src/arrow/compute/expression_test.cc
index 44159e76600fb..d33c348cd77da 100644
--- a/cpp/src/arrow/compute/expression_test.cc
+++ b/cpp/src/arrow/compute/expression_test.cc
@@ -863,6 +863,25 @@ TEST(Expression, ExecuteCall) {
])"));
}
+TEST(Expression, ExecuteCallWithNoArguments) {
+ const int kCount = 10;
+ auto random_options = RandomOptions::FromSeed(/*seed=*/0);
+ ExecBatch input({}, kCount);
+
+ Expression random_expr = call("random", {}, random_options);
+ ASSERT_OK_AND_ASSIGN(random_expr, random_expr.Bind(float64()));
+
+ ASSERT_OK_AND_ASSIGN(Datum actual, ExecuteScalarExpression(random_expr, input));
+ compute::ExecContext* exec_context = default_exec_context();
+ ASSERT_OK_AND_ASSIGN(auto function,
+ exec_context->func_registry()->GetFunction("random"));
+ ASSERT_OK_AND_ASSIGN(Datum expected,
+ function->Execute(input, &random_options, exec_context));
+ AssertDatumsEqual(actual, expected, /*verbose=*/true);
+
+ EXPECT_EQ(actual.length(), kCount);
+}
+
TEST(Expression, ExecuteDictionaryTransparent) {
ExpectExecute(
equal(field_ref("a"), field_ref("b")),
From 0415a60eebdaf8130ca3028a802529ecfb738493 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei
Date: Tue, 6 Feb 2024 08:44:29 +0900
Subject: [PATCH 42/74] GH-39883: [CI][R][Windows] Use
ci/scripts/install_minio.sh with Git bash (#39929)
### Rationale for this change
`curl` in Rtools can't be used on non Rtools' MSYS2 environment. Because `curl` in Rtools can't refer `/usr/ssl/certs/ca-bundle.crt` on non Rtools' MSYS2 environment.
### What changes are included in this PR?
Use the `bash` in GitHub Actions Runner. `curl` in the environment works.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
No.
* Closes: #39883
Authored-by: Sutou Kouhei
Signed-off-by: Sutou Kouhei
---
.github/workflows/r.yml | 41 ++++++++++++-----------
ci/scripts/install_minio.sh | 67 +++++++++++++++++++++++++------------
2 files changed, 67 insertions(+), 41 deletions(-)
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 2a801b6040ec8..3d1f75ede4bb5 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -21,24 +21,26 @@ on:
push:
paths:
- ".github/workflows/r.yml"
- - "ci/scripts/r_*.sh"
- - "ci/scripts/cpp_*.sh"
- - "ci/scripts/PKGBUILD"
- - "ci/etc/rprofile"
- "ci/docker/**"
+ - "ci/etc/rprofile"
+ - "ci/scripts/PKGBUILD"
+ - "ci/scripts/cpp_*.sh"
+ - "ci/scripts/install_minio.sh"
+ - "ci/scripts/r_*.sh"
- "cpp/**"
- - 'docker-compose.yml'
+ - "docker-compose.yml"
- "r/**"
pull_request:
paths:
- ".github/workflows/r.yml"
- - "ci/scripts/r_*.sh"
- - "ci/scripts/cpp_*.sh"
- - "ci/scripts/PKGBUILD"
- - "ci/etc/rprofile"
- "ci/docker/**"
+ - "ci/etc/rprofile"
+ - "ci/scripts/PKGBUILD"
+ - "ci/scripts/cpp_*.sh"
+ - "ci/scripts/install_minio.sh"
+ - "ci/scripts/r_*.sh"
- "cpp/**"
- - 'docker-compose.yml'
+ - "docker-compose.yml"
- "r/**"
concurrency:
@@ -256,6 +258,16 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 0
+ # This must be done before r-lib/actions/setup-r because curl in
+ # Rtools doesn't work on non Rtools' MSYS2 environment. If we
+ # use "shell: bash" after r-lib/actions/setup-r, bash in Rtools
+ # is used on non Rtools' MSYS2 environment.
+ - name: Install MinIO
+ shell: bash
+ run: |
+ mkdir -p "$HOME/.local/bin"
+ ci/scripts/install_minio.sh latest "$HOME/.local"
+ echo "$HOME/.local/bin" >> $GITHUB_PATH
- run: mkdir r/windows
- name: Download artifacts
uses: actions/download-artifact@v3
@@ -282,15 +294,6 @@ jobs:
working-directory: 'r'
extra-packages: |
any::rcmdcheck
- - name: Install MinIO
- shell: bash
- run: |
- mkdir -p "$HOME/.local/bin"
- curl \
- --output "$HOME/.local/bin/minio.exe" \
- https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z
- chmod +x "$HOME/.local/bin/minio.exe"
- echo "$HOME/.local/bin" >> $GITHUB_PATH
# TODO(ARROW-17149): figure out why the GCS tests are hanging on Windows
# - name: Install Google Cloud Storage Testbench
# shell: bash
diff --git a/ci/scripts/install_minio.sh b/ci/scripts/install_minio.sh
index 6ea8e1a095c39..e493a183b4543 100755
--- a/ci/scripts/install_minio.sh
+++ b/ci/scripts/install_minio.sh
@@ -17,7 +17,15 @@
# specific language governing permissions and limitations
# under the License.
-set -e
+set -eu
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 "
+ exit 1
+fi
+
+version=$1
+prefix=$2
declare -A archs
archs=([x86_64]=amd64
@@ -25,45 +33,60 @@ archs=([x86_64]=amd64
[aarch64]=arm64
[s390x]=s390x)
-declare -A platforms
-platforms=([Linux]=linux
- [Darwin]=darwin)
-
arch=$(uname -m)
-platform=$(uname)
-version=$1
-prefix=$2
-
-if [ "$#" -ne 2 ]; then
- echo "Usage: $0 "
- exit 1
-elif [ -z ${archs[$arch]} ]; then
+if [ -z ${archs[$arch]} ]; then
echo "Unsupported architecture: ${arch}"
exit 0
-elif [ -z ${platforms[$platform]} ]; then
- echo "Unsupported platform: ${platform}"
- exit 0
-elif [ "${version}" != "latest" ]; then
+fi
+arch=${archs[$arch]}
+
+platform=$(uname)
+case ${platform} in
+ Linux)
+ platform=linux
+ ;;
+ Darwin)
+ platform=darwin
+ ;;
+ MSYS_NT*|MINGW64_NT*)
+ platform=windows
+ ;;
+ *)
+ echo "Unsupported platform: ${platform}"
+ exit 0
+ ;;
+esac
+
+if [ "${version}" != "latest" ]; then
echo "Cannot fetch specific versions of minio, only latest is supported."
exit 1
fi
-arch=${archs[$arch]}
-platform=${platforms[$platform]}
-
# Use specific versions for minio server and client to avoid CI failures on new releases.
minio_version="minio.RELEASE.2022-05-26T05-48-41Z"
mc_version="mc.RELEASE.2022-05-09T04-08-26Z"
+download()
+{
+ local output=$1
+ local url=$2
+
+ if type wget > /dev/null 2>&1; then
+ wget -nv --output-document ${output} ${url}
+ else
+ curl --fail --location --output ${output} ${url}
+ fi
+}
+
if [[ ! -x ${prefix}/bin/minio ]]; then
url="https://dl.min.io/server/minio/release/${platform}-${arch}/archive/${minio_version}"
echo "Fetching ${url}..."
- wget -nv --output-document ${prefix}/bin/minio ${url}
+ download ${prefix}/bin/minio ${url}
chmod +x ${prefix}/bin/minio
fi
if [[ ! -x ${prefix}/bin/mc ]]; then
url="https://dl.min.io/client/mc/release/${platform}-${arch}/archive/${mc_version}"
echo "Fetching ${url}..."
- wget -nv --output-document ${prefix}/bin/mc ${url}
+ download ${prefix}/bin/mc ${url}
chmod +x ${prefix}/bin/mc
fi
From 9db823b45fd4ae455c531e944681c898bede7d53 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei
Date: Tue, 6 Feb 2024 08:50:58 +0900
Subject: [PATCH 43/74] GH-39930: [C++] Use Requires instead of Libs for system
RE2 in arrow.pc (#39932)
### Rationale for this change
We chose Libs{,.private} with libre2.a for system RE2 in GH-10626. Because "Require{,.private} re2" may add "-std=c++11". If "-std=c++11" was added, users can't build Apache Arrow C++ because Apache Arrow C++ requires C++17 or later.
But this approach doesn't work with RE2 2024-06-01 or later because it at least requires Abseil. If we keep the Libs{,.private} approach, we also need to add Abseil libraries to Libs{,.private}. But it's unmaintainable.
### What changes are included in this PR?
Let's use "Requires{,.private} re2" instead of Libs{,.private}. I hope recent re2.pc doesn't add "-std=c++11".
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* Closes: #39930
Authored-by: Sutou Kouhei
Signed-off-by: Sutou Kouhei
---
cpp/cmake_modules/ThirdpartyToolchain.cmake | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 6bb9c0f6af2ca..0238c26c0fb51 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2594,16 +2594,11 @@ macro(build_re2)
endmacro()
if(ARROW_WITH_RE2)
- # Don't specify "PC_PACKAGE_NAMES re2" here because re2.pc may
- # include -std=c++11. It's not compatible with C source and C++
- # source not uses C++ 11.
- resolve_dependency(re2 HAVE_ALT TRUE)
- if(${re2_SOURCE} STREQUAL "SYSTEM" AND ARROW_BUILD_STATIC)
- get_target_property(RE2_TYPE re2::re2 TYPE)
- if(NOT RE2_TYPE STREQUAL "INTERFACE_LIBRARY")
- string(APPEND ARROW_PC_LIBS_PRIVATE " $")
- endif()
- endif()
+ resolve_dependency(re2
+ HAVE_ALT
+ TRUE
+ PC_PACKAGE_NAMES
+ re2)
add_definitions(-DARROW_WITH_RE2)
endif()
From 0896d5b86510b9d410fd849610e2e1dedc77bf03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?=
Date: Tue, 6 Feb 2024 01:41:27 +0100
Subject: [PATCH 44/74] GH-39943: [CI][Python] Update manylinux images to avoid
GPG problems downloading packages (#39944)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
### Rationale for this change
Old manylinux images seem to have issues with a GPG key in order to download packages.
### What changes are included in this PR?
Update the manylinux image used for the latest one.
### Are these changes tested?
Via archery jobs
### Are there any user-facing changes?
No
* Closes: #39943
Authored-by: Raúl Cumplido
Signed-off-by: Sutou Kouhei
---
docker-compose.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docker-compose.yml b/docker-compose.yml
index 0252c4ec8a896..5c84d24fd7df7 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1030,7 +1030,7 @@ services:
args:
arch: ${ARCH}
arch_short: ${ARCH_SHORT}
- base: quay.io/pypa/manylinux2014_${ARCH_ALIAS}:2023-10-03-72cdc42
+ base: quay.io/pypa/manylinux2014_${ARCH_ALIAS}:2024-02-04-ea37246
vcpkg: ${VCPKG}
python: ${PYTHON}
manylinux: 2014
@@ -1053,7 +1053,7 @@ services:
args:
arch: ${ARCH}
arch_short: ${ARCH_SHORT}
- base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2023-10-03-72cdc42
+ base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2024-02-04-ea37246
vcpkg: ${VCPKG}
python: ${PYTHON}
manylinux: 2_28
From 15525102992fbe83e6ce0943fe09e3f23a1287f4 Mon Sep 17 00:00:00 2001
From: Thomas Newton
Date: Tue, 6 Feb 2024 00:48:57 +0000
Subject: [PATCH 45/74] GH-39621: [CI][Packaging] Update vcpkg to 2023.11.20
release (#39622)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
### Rationale for this change
Old version of vcpkg is blocking https://github.com/apache/arrow/issues/39352
### What changes are included in this PR?
- Upgrade vcpkg
- Fix ports patches
- Upgrade visual studio used in windows wheel builds. VS2019 is now required for the vcpkg `abseil` build.
- Move `VCPKG_FORCE_SYSTEM_BINARIES` to be set before vcpkg install to fix vcpkg install on linux ARM.
- Fix for LLVM 17 which requires that an executable exports "llvm_orc_registerEHFrameSectionWrapper()" and "llvm_orc_unregisterEHFrameSectionWrapper()". This effects the java builds that depend on llvm from vcpkg for gandiva.
- Update image used for python wheel builds on windows to 2024-02-05
### Are these changes tested?
Does not change any behaviour so should be covered by existing tests
### Are there any user-facing changes?
There shouldn't be
* Closes: #39621
Lead-authored-by: Thomas Newton
Co-authored-by: Sutou Kouhei
Co-authored-by: Raúl Cumplido
Signed-off-by: Sutou Kouhei
---
.env | 6 ++---
ci/docker/python-wheel-manylinux.dockerfile | 7 ++---
...thon-wheel-windows-test-vs2019.dockerfile} | 4 +--
...=> python-wheel-windows-vs2019.dockerfile} | 4 +--
ci/scripts/python_wheel_windows_build.bat | 8 +++---
ci/vcpkg/ports.patch | 27 ++++++++++---------
ci/vcpkg/vcpkg.json | 5 +++-
cpp/src/gandiva/CMakeLists.txt | 9 +++++++
dev/tasks/python-wheels/github.windows.yml | 12 ++++-----
docker-compose.yml | 16 +++++------
10 files changed, 57 insertions(+), 41 deletions(-)
rename ci/docker/{python-wheel-windows-test-vs2017.dockerfile => python-wheel-windows-test-vs2019.dockerfile} (96%)
rename ci/docker/{python-wheel-windows-vs2017.dockerfile => python-wheel-windows-vs2019.dockerfile} (98%)
diff --git a/.env b/.env
index 427a4ab0bf398..eb87dc62bdd8c 100644
--- a/.env
+++ b/.env
@@ -92,13 +92,13 @@ DEVTOOLSET_VERSION=
# Used through docker-compose.yml and serves as the default version for the
# ci/scripts/install_vcpkg.sh script. Prefer to use short SHAs to keep the
# docker tags more readable.
-VCPKG="501db0f17ef6df184fcdbfbe0f87cde2313b6ab1" # 2023.04.15 Release
+VCPKG="a42af01b72c28a8e1d7b48107b33e4f286a55ef6" # 2023.11.20 Release
# This must be updated when we update
-# ci/docker/python-wheel-windows-vs2017.dockerfile.
+# ci/docker/python-wheel-windows-vs2019.dockerfile.
# This is a workaround for our CI problem that "archery docker build" doesn't
# use pulled built images in dev/tasks/python-wheels/github.windows.yml.
-PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2023-08-02
+PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2024-02-05
# Use conanio/${CONAN} for "docker-compose run --rm conan". See
# https://github.com/conan-io/conan-docker-tools#readme for available
diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile
index a07c727ac76fa..2831440d5a967 100644
--- a/ci/docker/python-wheel-manylinux.dockerfile
+++ b/ci/docker/python-wheel-manylinux.dockerfile
@@ -62,15 +62,16 @@ COPY ci/vcpkg/*.patch \
COPY ci/scripts/install_vcpkg.sh \
arrow/ci/scripts/
ENV VCPKG_ROOT=/opt/vcpkg
-RUN arrow/ci/scripts/install_vcpkg.sh ${VCPKG_ROOT} ${vcpkg}
-ENV PATH="${PATH}:${VCPKG_ROOT}"
-
ARG build_type=release
ENV CMAKE_BUILD_TYPE=${build_type} \
VCPKG_FORCE_SYSTEM_BINARIES=1 \
VCPKG_OVERLAY_TRIPLETS=/arrow/ci/vcpkg \
VCPKG_DEFAULT_TRIPLET=${arch_short}-linux-static-${build_type} \
VCPKG_FEATURE_FLAGS="manifests"
+
+RUN arrow/ci/scripts/install_vcpkg.sh ${VCPKG_ROOT} ${vcpkg}
+ENV PATH="${PATH}:${VCPKG_ROOT}"
+
COPY ci/vcpkg/vcpkg.json arrow/ci/vcpkg/
# cannot use the S3 feature here because while aws-sdk-cpp=1.9.160 contains
# ssl related fixes as well as we can patch the vcpkg portfile to support
diff --git a/ci/docker/python-wheel-windows-test-vs2017.dockerfile b/ci/docker/python-wheel-windows-test-vs2019.dockerfile
similarity index 96%
rename from ci/docker/python-wheel-windows-test-vs2017.dockerfile
rename to ci/docker/python-wheel-windows-test-vs2019.dockerfile
index e842ede18454b..67d99fa9c5724 100644
--- a/ci/docker/python-wheel-windows-test-vs2017.dockerfile
+++ b/ci/docker/python-wheel-windows-test-vs2019.dockerfile
@@ -19,8 +19,8 @@
# when you update this file.
# based on mcr.microsoft.com/windows/servercore:ltsc2019
-# contains choco and vs2017 preinstalled
-FROM abrarov/msvc-2017:2.11.0
+# contains choco and vs2019 preinstalled
+FROM abrarov/msvc-2019:2.11.0
# Add unix tools to path
RUN setx path "%path%;C:\Program Files\Git\usr\bin"
diff --git a/ci/docker/python-wheel-windows-vs2017.dockerfile b/ci/docker/python-wheel-windows-vs2019.dockerfile
similarity index 98%
rename from ci/docker/python-wheel-windows-vs2017.dockerfile
rename to ci/docker/python-wheel-windows-vs2019.dockerfile
index 067105b3a7995..b8e8aad952b1c 100644
--- a/ci/docker/python-wheel-windows-vs2017.dockerfile
+++ b/ci/docker/python-wheel-windows-vs2019.dockerfile
@@ -19,8 +19,8 @@
# when you update this file.
# based on mcr.microsoft.com/windows/servercore:ltsc2019
-# contains choco and vs2017 preinstalled
-FROM abrarov/msvc-2017:2.11.0
+# contains choco and vs2019 preinstalled
+FROM abrarov/msvc-2019:2.11.0
# Install CMake and Ninja
ARG cmake=3.21.4
diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat
index ffb43b3481e55..73b0192d9bc97 100644
--- a/ci/scripts/python_wheel_windows_build.bat
+++ b/ci/scripts/python_wheel_windows_build.bat
@@ -19,7 +19,7 @@
echo "Building windows wheel..."
-call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat"
+call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
echo "=== (%PYTHON_VERSION%) Clear output directories and leftovers ==="
del /s /q C:\arrow-build
@@ -50,7 +50,8 @@ set ARROW_WITH_SNAPPY=ON
set ARROW_WITH_ZLIB=ON
set ARROW_WITH_ZSTD=ON
set CMAKE_UNITY_BUILD=ON
-set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
+set CMAKE_GENERATOR=Visual Studio 16 2019
+set CMAKE_PLATFORM=x64
set VCPKG_ROOT=C:\vcpkg
set VCPKG_FEATURE_FLAGS=-manifests
set VCGPK_TARGET_TRIPLET=amd64-windows-static-md-%CMAKE_BUILD_TYPE%
@@ -96,6 +97,7 @@ cmake ^
-DVCPKG_MANIFEST_MODE=OFF ^
-DVCPKG_TARGET_TRIPLET=%VCGPK_TARGET_TRIPLET% ^
-G "%CMAKE_GENERATOR%" ^
+ -A "%CMAKE_PLATFORM%" ^
C:\arrow\cpp || exit /B 1
cmake --build . --config %CMAKE_BUILD_TYPE% --target install || exit /B 1
popd
@@ -121,6 +123,6 @@ set CMAKE_PREFIX_PATH=C:\arrow-dist
pushd C:\arrow\python
@REM bundle the msvc runtime
-cp "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Redist\MSVC\14.16.27012\x64\Microsoft.VC141.CRT\msvcp140.dll" pyarrow\
+cp "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Redist\MSVC\14.28.29325\x64\Microsoft.VC142.CRT\msvcp140.dll" pyarrow\
python setup.py bdist_wheel || exit /B 1
popd
diff --git a/ci/vcpkg/ports.patch b/ci/vcpkg/ports.patch
index 68f6cae5addc9..0d4fb540a2003 100644
--- a/ci/vcpkg/ports.patch
+++ b/ci/vcpkg/ports.patch
@@ -1,13 +1,14 @@
diff --git a/ports/curl/portfile.cmake b/ports/curl/portfile.cmake
-index 5a14562..924b1b7 100644
+index bdc544e9e..53f6bbc3b 100644
--- a/ports/curl/portfile.cmake
+++ b/ports/curl/portfile.cmake
-@@ -87,8 +87,11 @@ vcpkg_cmake_configure(
+@@ -74,9 +74,12 @@ vcpkg_cmake_configure(
-DENABLE_MANUAL=OFF
-DCURL_CA_FALLBACK=ON
-DCURL_USE_LIBPSL=OFF
+ -DCURL_CA_PATH=none
+ -DCURL_CA_BUNDLE=none
+ -DCMAKE_DISABLE_FIND_PACKAGE_Perl=ON
OPTIONS_DEBUG
-DENABLE_DEBUG=ON
+ ${EXTRA_ARGS_DEBUG}
@@ -15,29 +16,29 @@ index 5a14562..924b1b7 100644
vcpkg_cmake_install()
vcpkg_copy_pdbs()
diff --git a/ports/snappy/portfile.cmake b/ports/snappy/portfile.cmake
-index 8f3f3f9..745b0fb 100644
+index 0c7098082..c603c3653 100644
--- a/ports/snappy/portfile.cmake
+++ b/ports/snappy/portfile.cmake
-@@ -9,6 +9,7 @@ vcpkg_from_github(
- HEAD_REF master
+@@ -10,6 +10,7 @@ vcpkg_from_github(
PATCHES
fix_clang-cl_build.patch
+ no-werror.patch
+ "snappy-disable-bmi.patch"
)
vcpkg_cmake_configure(
diff --git a/ports/snappy/snappy-disable-bmi.patch b/ports/snappy/snappy-disable-bmi.patch
new file mode 100644
-index 0000000..a57ce0c
+index 000000000..e839c93a4
--- /dev/null
+++ b/ports/snappy/snappy-disable-bmi.patch
@@ -0,0 +1,19 @@
+diff --git a/snappy.cc b/snappy.cc
-+index 79dc0e8..f3153ea 100644
++index d414718..7b49d2a 100644
+--- a/snappy.cc
++++ b/snappy.cc
-+@@ -965,14 +965,10 @@ static inline void Report(const char *algorithm, size_t compressed_size,
-+ static inline uint32_t ExtractLowBytes(uint32_t v, int n) {
++@@ -1014,14 +1014,10 @@ static inline void Report(const char *algorithm, size_t compressed_size,
++ static inline uint32_t ExtractLowBytes(const uint32_t& v, int n) {
+ assert(n >= 0);
+ assert(n <= 4);
+-#if SNAPPY_HAVE_BMI2
@@ -52,13 +53,13 @@ index 0000000..a57ce0c
+
+ static inline bool LeftShiftOverflows(uint8_t value, uint32_t shift) {
diff --git a/ports/llvm/portfile.cmake b/ports/llvm/portfile.cmake
-index 4d7e26a..1f054a2 100644
+index bf9397b66..c3112b673 100644
--- a/ports/llvm/portfile.cmake
+++ b/ports/llvm/portfile.cmake
-@@ -274,6 +274,8 @@ vcpkg_cmake_configure(
+@@ -293,6 +293,8 @@ vcpkg_cmake_configure(
+ ${FEATURE_OPTIONS}
+ MAYBE_UNUSED_VARIABLES
COMPILER_RT_ENABLE_IOS
- OPENMP_TOOLS_INSTALL_DIR
- MLIR_TOOLS_INSTALL_DIR
+ BOLT_TOOLS_INSTALL_DIR
+ LIBOMP_INSTALL_ALIASES
)
diff --git a/ci/vcpkg/vcpkg.json b/ci/vcpkg/vcpkg.json
index 71c23165e61f0..99771728ecf18 100644
--- a/ci/vcpkg/vcpkg.json
+++ b/ci/vcpkg/vcpkg.json
@@ -81,8 +81,11 @@
"default-features": false,
"features": [
"clang",
- "default-options",
"default-targets",
+ "enable-bindings",
+ "enable-terminfo",
+ "enable-zlib",
+ "enable-zstd",
"enable-rtti",
"lld",
"tools"
diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index 3f038f54a7b27..d773fb5ff5895 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -229,6 +229,15 @@ function(ADD_GANDIVA_TEST REL_TEST_NAME)
set(TEST_NAME gandiva-${REL_TEST_NAME})
string(REPLACE "_" "-" TEST_NAME ${TEST_NAME})
+
+ if(ARG_USE_STATIC_LINKING OR ARROW_TEST_LINKAGE STREQUAL "static")
+ # LLVM 17 or later requires that an executable exports
+ # "llvm_orc_registerEHFrameSectionWrapper()" and
+ # "llvm_orc_unregisterEHFrameSectionWrapper()". We need to do
+ # nothing when we use libLLVM.so. But we need to export symbols
+ # explicitly when we use libLLVM*.a.
+ set_target_properties(${TEST_NAME} PROPERTIES ENABLE_EXPORTS TRUE)
+ endif()
endfunction()
add_gandiva_test(internals-test
diff --git a/dev/tasks/python-wheels/github.windows.yml b/dev/tasks/python-wheels/github.windows.yml
index 1641796a719e2..01f4977a9b0b1 100644
--- a/dev/tasks/python-wheels/github.windows.yml
+++ b/dev/tasks/python-wheels/github.windows.yml
@@ -29,7 +29,7 @@ jobs:
# this is a private repository at the moment (mostly because of licensing
# consideration of windows images with visual studio), but anyone can
# recreate the image by manually building it via:
- # `archery build python-wheel-windows-vs2017`
+ # `archery build python-wheel-windows-vs2019`
# note that we don't run docker build since there wouldn't be a cache hit
# and rebuilding the dependencies takes a fair amount of time
REPO: ghcr.io/ursacomputing/arrow
@@ -46,17 +46,17 @@ jobs:
run: |
cd arrow
@rem We want to use only
- @rem archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2017
+ @rem archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2019
@rem but it doesn't use pulled caches.
@rem It always build an image from scratch.
@rem We can remove this workaround once we find a way to use
@rem pulled caches when build an image.
echo on
- archery docker pull --no-ignore-pull-failures python-wheel-windows-vs2017
+ archery docker pull --no-ignore-pull-failures python-wheel-windows-vs2019
if errorlevel 1 (
- archery docker build --no-pull python-wheel-windows-vs2017 || exit /B 1
+ archery docker build --no-pull python-wheel-windows-vs2019 || exit /B 1
)
- archery docker run --no-build -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2017
+ archery docker run --no-build -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2019
- uses: actions/upload-artifact@v3
with:
@@ -77,5 +77,5 @@ jobs:
shell: cmd
run: |
cd arrow
- archery docker push python-wheel-windows-vs2017
+ archery docker push python-wheel-windows-vs2019
{% endif %}
diff --git a/docker-compose.yml b/docker-compose.yml
index 5c84d24fd7df7..8a7223b57632f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -172,7 +172,7 @@ x-hierarchy:
- python-wheel-manylinux-2-28
- python-wheel-manylinux-test-imports
- python-wheel-manylinux-test-unittests
- - python-wheel-windows-vs2017
+ - python-wheel-windows-vs2019
- python-wheel-windows-test
volumes:
@@ -1098,19 +1098,19 @@ services:
CHECK_UNITTESTS: "ON"
command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
- python-wheel-windows-vs2017:
- image: ${REPO}:python-${PYTHON}-wheel-windows-vs2017-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
+ python-wheel-windows-vs2019:
+ image: ${REPO}:python-${PYTHON}-wheel-windows-vs2019-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
build:
args:
vcpkg: ${VCPKG}
python: ${PYTHON}
context: .
- dockerfile: ci/docker/python-wheel-windows-vs2017.dockerfile
+ dockerfile: ci/docker/python-wheel-windows-vs2019.dockerfile
# This should make the pushed images reusable, but the image gets rebuilt.
# Uncomment if no local cache is available.
# cache_from:
- # - abrarov/msvc-2017:2.11.0
- # - ${REPO}:python-${PYTHON}-wheel-windows-vs2017-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
+ # - abrarov/msvc-2019:2.11.0
+ # - ${REPO}:python-${PYTHON}-wheel-windows-vs2019-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
volumes:
- "${DOCKER_VOLUME_PREFIX}python-wheel-windows-clcache:C:/clcache"
- type: bind
@@ -1119,12 +1119,12 @@ services:
command: arrow\\ci\\scripts\\python_wheel_windows_build.bat
python-wheel-windows-test:
- image: ${REPO}:python-${PYTHON}-wheel-windows-test-vs2017-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
+ image: ${REPO}:python-${PYTHON}-wheel-windows-test-vs2019-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
build:
args:
python: ${PYTHON}
context: .
- dockerfile: ci/docker/python-wheel-windows-test-vs2017.dockerfile
+ dockerfile: ci/docker/python-wheel-windows-test-vs2019.dockerfile
volumes:
- "${DOCKER_VOLUME_PREFIX}python-wheel-windows-clcache:C:/clcache"
- type: bind
From 874e59670773bd0d52d9c6811483c00abc5ee736 Mon Sep 17 00:00:00 2001
From: Alenka Frim
Date: Tue, 6 Feb 2024 01:54:13 +0100
Subject: [PATCH 46/74] GH-39737: [Release][Docs] Update post release
documentation task (#39762)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This PR updates the `dev/release/post-08-docs.sh` task so that
- `DOCUMENTATION_OPTIONS.theme_switcher_version_match` changes from `""` to `"{previous_version}"`
- `DOCUMENTATION_OPTIONS.show_version_warning_banner` changes from `false` to `true`
for the documentation that is moved to a subfolder when a new major release is done.
* Closes: #39737
Lead-authored-by: AlenkaF
Co-authored-by: Alenka Frim
Co-authored-by: Raúl Cumplido
Co-authored-by: Sutou Kouhei
Signed-off-by: Sutou Kouhei
---
dev/release/post-08-docs.sh | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/dev/release/post-08-docs.sh b/dev/release/post-08-docs.sh
index f18f7d10c73e6..4df574700e812 100755
--- a/dev/release/post-08-docs.sh
+++ b/dev/release/post-08-docs.sh
@@ -86,6 +86,21 @@ if [ "$is_major_release" = "yes" ] ; then
fi
git add docs
git commit -m "[Website] Update documentations for ${version}"
+
+# Update DOCUMENTATION_OPTIONS.theme_switcher_version_match and
+# DOCUMENTATION_OPTIONS.show_version_warning_banner
+pushd docs/${previous_series}
+find ./ \
+ -type f \
+ -exec \
+ sed -i.bak \
+ -e "s/DOCUMENTATION_OPTIONS.theme_switcher_version_match = '';/DOCUMENTATION_OPTIONS.theme_switcher_version_match = '${previous_version}';/g" \
+ -e "s/DOCUMENTATION_OPTIONS.show_version_warning_banner = false/DOCUMENTATION_OPTIONS.show_version_warning_banner = true/g" \
+ {} \;
+find ./ -name '*.bak' -delete
+popd
+git add docs/${previous_series}
+git commit -m "[Website] Update warning banner for ${previous_series}"
git clean -d -f -x
popd
From 062c841836642ab95b1ffde031d271fffd29987d Mon Sep 17 00:00:00 2001
From: Sutou Kouhei
Date: Tue, 6 Feb 2024 10:56:16 +0900
Subject: [PATCH 47/74] GH-39057: [CI][C++][Go] Don't run jobs that use a
self-hosted GitHub Actions Runner on fork (#39903)
### Rationale for this change
If jobs that use a self-hosted GitHub Actions Runner on fork are submitted on fork, they will timeout eventually and report noisy failure notifications.
### What changes are included in this PR?
We can't use `jobs..if` to reject jobs that use self-hosted GitHub Actions Runner because `jobs..if` is evaluated before `jobs..strategy.matrix`.
https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idif
> Note: The `jobs..if` condition is evaluated before
> `jobs..strategy.matrix` is applied.
We can use output `jobs.outputs` instead. See also:
* https://docs.github.com/en/actions/using-jobs/defining-outputs-for-jobs
* https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idoutputs
### Are these changes tested?
Yes.
### Are there any user-facing changes?
No.
* Closes: #39057
Authored-by: Sutou Kouhei
Signed-off-by: Sutou Kouhei
---
.github/workflows/cpp.yml | 74 +++++++++++++++++++++++++++------------
.github/workflows/go.yml | 65 +++++++++++++++++++++++++---------
2 files changed, 99 insertions(+), 40 deletions(-)
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 9fbad06692bd2..e9409f1cd6248 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -57,37 +57,65 @@ env:
DOCKER_VOLUME_PREFIX: ".docker/"
jobs:
+ docker-targets:
+ name: Docker targets
+ runs-on: ubuntu-latest
+ outputs:
+ targets: ${{ steps.detect-targets.outputs.targets }}
+ steps:
+ - name: Detect targets
+ id: detect-targets
+ run: |
+ echo "targets<> "$GITHUB_OUTPUT"
+ echo "[" >> "$GITHUB_OUTPUT"
+ cat <> "$GITHUB_OUTPUT"
+ {
+ "arch": "amd64",
+ "clang-tools": "14",
+ "image": "conda-cpp",
+ "llvm": "14",
+ "runs-on": "ubuntu-latest",
+ "simd-level": "AVX2",
+ "title": "AMD64 Conda C++ AVX2",
+ "ubuntu": "22.04"
+ },
+ {
+ "arch": "amd64",
+ "clang-tools": "14",
+ "image": "ubuntu-cpp-sanitizer",
+ "llvm": "14",
+ "runs-on": "ubuntu-latest",
+ "title": "AMD64 Ubuntu 22.04 C++ ASAN UBSAN",
+ "ubuntu": "22.04"
+ }
+ JSON
+ if [ "$GITHUB_REPOSITORY_OWNER" = "apache" ]; then
+ echo "," >> "$GITHUB_OUTPUT"
+ cat <> "$GITHUB_OUTPUT"
+ {
+ "arch": "arm64v8",
+ "clang-tools": "10",
+ "image": "ubuntu-cpp",
+ "llvm": "10",
+ "runs-on": ["self-hosted", "arm", "linux"],
+ "title": "ARM64 Ubuntu 20.04 C++",
+ "ubuntu": "20.04"
+ }
+ JSON
+ fi
+ echo "]" >> "$GITHUB_OUTPUT"
+ echo "JSON" >> "$GITHUB_OUTPUT"
+
docker:
name: ${{ matrix.title }}
+ needs: docker-targets
runs-on: ${{ matrix.runs-on }}
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 75
strategy:
fail-fast: false
matrix:
- include:
- - arch: amd64
- clang-tools: "14"
- image: conda-cpp
- llvm: "14"
- runs-on: ubuntu-latest
- simd-level: AVX2
- title: AMD64 Conda C++ AVX2
- ubuntu: "22.04"
- - arch: amd64
- clang-tools: "14"
- image: ubuntu-cpp-sanitizer
- llvm: "14"
- runs-on: ubuntu-latest
- title: AMD64 Ubuntu 22.04 C++ ASAN UBSAN
- ubuntu: "22.04"
- - arch: arm64v8
- clang-tools: "10"
- image: ubuntu-cpp
- llvm: "10"
- runs-on: ["self-hosted", "arm", "linux"]
- title: ARM64 Ubuntu 20.04 C++
- ubuntu: "20.04"
+ include: ${{ fromJson(needs.docker-targets.outputs.targets) }}
env:
ARCH: ${{ matrix.arch }}
ARROW_SIMD_LEVEL: ${{ matrix.simd-level }}
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index cd44e65e8811b..bbffab6704087 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -43,31 +43,62 @@ permissions:
jobs:
+ docker-targets:
+ name: Docker targets
+ runs-on: ubuntu-latest
+ if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+ outputs:
+ targets: ${{ steps.detect-targets.outputs.targets }}
+ steps:
+ - name: Detect targets
+ id: detect-targets
+ run: |
+ echo "targets<> "$GITHUB_OUTPUT"
+ echo "[" >> "$GITHUB_OUTPUT"
+ cat <> "$GITHUB_OUTPUT"
+ {
+ "arch-label": "AMD64",
+ "arch": "amd64",
+ "go": "1.19",
+ "runs-on": "ubuntu-latest"
+ },
+ {
+ "arch-label": "AMD64",
+ "arch": "amd64",
+ "go": "1.20",
+ "runs-on": "ubuntu-latest"
+ }
+ JSON
+ if [ "$GITHUB_REPOSITORY_OWNER" = "apache" ]; then
+ echo "," >> "$GITHUB_OUTPUT"
+ cat <> "$GITHUB_OUTPUT"
+ {
+ "arch-label": "ARM64",
+ "arch": "arm64v8",
+ "go": "1.19",
+ "runs-on": ["self-hosted", "arm", "linux"]
+ },
+ {
+ "arch-label": "ARM64",
+ "arch": "arm64v8",
+ "go": "1.20",
+ "runs-on": ["self-hosted", "arm", "linux"]
+ }
+ JSON
+ fi
+ echo "]" >> "$GITHUB_OUTPUT"
+ echo "JSON" >> "$GITHUB_OUTPUT"
+
docker:
name: ${{ matrix.arch-label }} Debian 11 Go ${{ matrix.go }}
+ needs: docker-targets
runs-on: ${{ matrix.runs-on }}
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
- include:
- - arch-label: AMD64
- arch: amd64
- go: 1.19
- runs-on: ubuntu-latest
- - arch-label: AMD64
- arch: amd64
- go: '1.20'
- runs-on: ubuntu-latest
- - arch-label: ARM64
- arch: arm64v8
- go: 1.19
- runs-on: ["self-hosted", "arm", "linux"]
- - arch-label: ARM64
- arch: arm64v8
- go: '1.20'
- runs-on: ["self-hosted", "arm", "linux"]
+ include: ${{ fromJson(needs.docker-targets.outputs.targets) }}
env:
ARCH: ${{ matrix.arch }}
GO: ${{ matrix.go }}
From f38ae607983264dc52a938d1930916c73292a92e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Feb 2024 11:03:44 +0900
Subject: [PATCH 48/74] MINOR: [Java] Bump com.fasterxml.jackson:jackson-bom
from 2.16.0 to 2.16.1 in /java (#39947)
Bumps [com.fasterxml.jackson:jackson-bom](https://github.com/FasterXML/jackson-bom) from 2.16.0 to 2.16.1.
Commits
f70e9cf
[maven-release-plugin] prepare release jackson-bom-2.16.1
22a8c3a
Prepare for 2.16.1 release
4203816
back to snapshot deps
4fb9d50
[maven-release-plugin] prepare for next development iteration
- See full diff in compare view
[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.fasterxml.jackson:jackson-bom&package-manager=maven&previous-version=2.16.0&new-version=2.16.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)
Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.
[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)
---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei
---
java/pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/java/pom.xml b/java/pom.xml
index b2b300b2f3fed..1faeb46d02afc 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -36,7 +36,7 @@
4.1.106.Final
1.60.0
3.23.1
- 2.16.0
+ 2.16.1
3.3.6
23.5.26
1.11.3
From 1950f8000fa25368602b530dbec4b3d286aed819 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Feb 2024 11:04:08 +0900
Subject: [PATCH 49/74] MINOR: [Java] Bump org.cyclonedx:cyclonedx-maven-plugin
from 2.7.10 to 2.7.11 in /java (#39948)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Bumps [org.cyclonedx:cyclonedx-maven-plugin](https://github.com/CycloneDX/cyclonedx-maven-plugin) from 2.7.10 to 2.7.11.
Release notes
Sourced from org.cyclonedx:cyclonedx-maven-plugin's releases.
2.7.11
🚀 New features and improvements
📦 Dependency updates
- define plugin-tools.version property (#453)
@hboutemy
- Bump org.apache.maven.plugin-tools:maven-plugin-annotations from 3.10.2 to 3.11.0 (#451)
@dependabot
- Bump org.apache.maven.plugins:maven-plugin-report-plugin from 3.10.2 to 3.11.0 (#450)
@dependabot
- Bump org.apache.maven.plugins:maven-plugin-plugin from 3.10.2 to 3.11.0 (#449)
@dependabot
- Bump org.apache.maven.plugins:maven-compiler-plugin from 3.11.0 to 3.12.1 (#447)
@dependabot
- Bump org.apache.maven.plugins:maven-plugin-plugin from 3.10.1 to 3.10.2 (#445)
@dependabot
- Bump org.apache.maven.plugins:maven-project-info-reports-plugin from 3.4.5 to 3.5.0 (#442)
@dependabot
- Bump org.apache.commons:commons-lang3 from 3.13.0 to 3.14.0 (#443)
@dependabot
- Bump org.apache.maven.plugin-tools:maven-plugin-annotations from 3.10.1 to 3.10.2 (#444)
@dependabot
- Bump org.junit:junit-bom from 5.10.0 to 5.10.1 (#422)
@dependabot
- Bump org.apache.maven.plugins:maven-plugin-report-plugin from 3.10.1 to 3.10.2 (#424)
@dependabot
- Bump org.apache.maven.plugins:maven-javadoc-plugin from 3.6.0 to 3.6.3 (#438)
@dependabot
- Bump actions/setup-java from 3 to 4 (#437)
@dependabot
- Bump org.apache.maven.plugins:maven-plugin-report-plugin from 3.9.0 to 3.10.1 (#417)
@dependabot
Commits
349fe7c
[maven-release-plugin] prepare release cyclonedx-maven-plugin-2.7.11
2d130a0
rename convert methohds to explicit project vs dependency
051be8e
cleanup unused code
d0e6cb5
test dependency type=zip for #431 (reverts #9)
46837cd
Update DefaultModelConverter.java to support Zip files
dc90b21
define plugin-tools.version property
8836cbd
Add support for custom external references (#428)
86410aa
Bump org.apache.maven.plugin-tools:maven-plugin-annotations
4d71b50
Bump org.apache.maven.plugins:maven-plugin-report-plugin
70aae8e
Bump org.apache.maven.plugins:maven-plugin-plugin from 3.10.2 to 3.11.0
- Additional commits viewable in compare view
[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.cyclonedx:cyclonedx-maven-plugin&package-manager=maven&previous-version=2.7.10&new-version=2.7.11)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)
Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.
[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)
---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei
---
java/maven/pom.xml | 2 +-
java/pom.xml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/java/maven/pom.xml b/java/maven/pom.xml
index 7fdca7db7b8d8..9842777c15495 100644
--- a/java/maven/pom.xml
+++ b/java/maven/pom.xml
@@ -271,7 +271,7 @@
org.cyclonedx
cyclonedx-maven-plugin
- 2.7.10
+ 2.7.11
package
diff --git a/java/pom.xml b/java/pom.xml
index 1faeb46d02afc..e928960182ab2 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -364,7 +364,7 @@
org.cyclonedx
cyclonedx-maven-plugin
- 2.7.10
+ 2.7.11
package
From c7a166fc5aeec3f1b6e5d68cc7746b228a8dad04 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Feb 2024 11:04:37 +0900
Subject: [PATCH 50/74] MINOR: [Java] Bump
org.apache.maven.plugins:maven-project-info-reports-plugin from 3.0.0 to
3.5.0 in /java (#39949)
Bumps [org.apache.maven.plugins:maven-project-info-reports-plugin](https://github.com/apache/maven-project-info-reports-plugin) from 3.0.0 to 3.5.0.
Commits
28ac4b5
[maven-release-plugin] prepare release maven-project-info-reports-plugin-3.5.0
9c4fc33
Remove unneeded incompatiblity notice
482ea62
Fix formatting
60cfdea
[MPIR-453] Replace Commons IO in favor of standard APIs
4d94edc
[MPIR-446] Update to Maven SCM 2.0.
91a065b
[MPIR-452] Upgrade to Parent 41
ceac0bf
Consistently use MavenReport#getReportOutputDirectory()
c16ec94
[MNG-7416] Simplify Boolean expressions and returns (#63)
abd0e76
Fix style value
18aedbb
Reduce IT runtime by invoking goal directly
- Additional commits viewable in compare view
[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-project-info-reports-plugin&package-manager=maven&previous-version=3.0.0&new-version=3.5.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)
Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.
[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)
---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei
---
java/maven/pom.xml | 2 +-
java/pom.xml | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/java/maven/pom.xml b/java/maven/pom.xml
index 9842777c15495..c2b13119fc440 100644
--- a/java/maven/pom.xml
+++ b/java/maven/pom.xml
@@ -333,7 +333,7 @@
org.apache.maven.plugins
maven-project-info-reports-plugin
- 3.0.0
+ 3.5.0
org.apache.maven.plugins
diff --git a/java/pom.xml b/java/pom.xml
index e928960182ab2..258e45a519c59 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -395,7 +395,7 @@
org.apache.maven.plugins
maven-project-info-reports-plugin
- 3.0.0
+ 3.5.0
org.apache.maven.plugins
@@ -598,7 +598,7 @@
org.apache.maven.plugins
maven-project-info-reports-plugin
- 3.0.0
+ 3.5.0
org.apache.maven.plugins
@@ -803,7 +803,7 @@
org.apache.maven.plugins
maven-project-info-reports-plugin
- 3.0.0
+ 3.5.0
org.apache.maven.plugins
From 672238ff6352fa388b54182d8ae1667f9e99c327 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Feb 2024 11:05:00 +0900
Subject: [PATCH 51/74] MINOR: [Java] Bump io.grpc:grpc-bom from 1.60.0 to
1.61.1 in /java (#39950)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Bumps [io.grpc:grpc-bom](https://github.com/grpc/grpc-java) from 1.60.0 to 1.61.1.
Release notes
Sourced from io.grpc:grpc-bom's releases.
v1.61.1
Bug Fixes
xds: Fix a bug in WeightedRoundRobinLoadBalancer
policy that could raise NullPointerException
and further cause channel panic when picking a subchannel. This bug can only be triggered when connection can not be established and the channel reports TRANSIENT_FAILURE
state. (#10868)
v1.61.0
API Changes
- Remove unused experimental API ManagedChannelBuilder.enableFullStreamDecompression (#10744)
- api: Deprecate LoadBalancer.EMPTY_PICKER added in 1.58.0 in favor of FixedResultPicker (860b5cb1f)
New Features
- binder: Experimental support for asynchronous security policies (#10566)
Improvements
- core: reduce CompositeReadableBuffer allocation (#3279)
- core: Improve error message clarity when a channel leak is detected (201893f5e)
- util: use shared index across
round_robin
pickers (dca89b25b). This makes its implementation more similar to weighted_round_robin
.
- xds: Implement ADS stream flow control mechanism (#10674). This limits the maximum memory consumed if the control plane sends updates more rapidly than they can be processed.
Bug Fixes
- core: Check outbound maximum message size for the compressed size in addition to the already-checked uncompressed size (#10739). Fixed the status code to be RESOURCE_EXHAUSTED instead of UNKNOWN.
- util: Fix NPE when multiple addresses are in an address group for petiole load balancer policies (#10769)
- util: Disable publishing of fixtures (8ac43dd81). The Gradle test fixtures are for use by grpc-java's internal tests.
- okhttp: Ignore known conscrypt socket close issue (#10812). This stops an exception from being thrown when a known Conscrypt synchronization issue happens.
Dependencies
- Drop support for Bazel 5 (55a9c012c). Bazel 7 is available, and Protobuf has already dropped support for Bazel 5.
- Change many compile deps to runtime deps (d6830d7f9). This reduces the transitive classes "leaked" into the compile classpath. In particular, grpc-core (
io.grpc.internal
) will be less frequently included transitively at compile time.
- Upgrade dependencies (c985797d9)
- Protobuf to 3.25.1
- auto-value-annotations to 1.10.4
- error_prone_annotations to 2.23.0
- proto-google-common-protos to 2.29.0
- google-cloud-logging to 3.15.14
- guava to 32.1.3-android
- okio to 3.4.0
Acknowledgements
v1.60.2
Bug Fixes
xds: Fix a bug in WeightedRoundRobinLoadBalancer
policy that could raise NullPointerException
and further cause channel panic when picking a subchannel. This bug can only be triggered when connection can not be established and the channel reports TRANSIENT_FAILURE
state. (#10868)
v1.60.1
Bug Fixes
- util: Fix NPE when multiple addresses in an address group for petiole load balancer policies (#10770)
Commits
[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=io.grpc:grpc-bom&package-manager=maven&previous-version=1.60.0&new-version=1.61.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)
Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.
[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)
---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei
---
java/pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/java/pom.xml b/java/pom.xml
index 258e45a519c59..6442987f5a192 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -34,7 +34,7 @@
2.0.11
33.0.0-jre
4.1.106.Final
- 1.60.0
+ 1.61.1
3.23.1
2.16.1
3.3.6
From 0993b369c4b91d81a17166d1427e7c26cd9beee4 Mon Sep 17 00:00:00 2001
From: david dali susanibar arce
Date: Mon, 5 Feb 2024 21:35:34 -0500
Subject: [PATCH 52/74] GH-39900: [Java][CI] To upload Maven and Memory Netty
Buffer Patch into Apache Nightly repository (#39901)
### Rationale for this change
To upload Maven and Memory Netty Buffer Patch into Apache Nightly repository
### What changes are included in this PR?
Upload Maven and Memory Netty Buffer Patch into Apache Nightly repository
### Are these changes tested?
Needed to run https://github.com/apache/arrow/actions/workflows/java_nightly.yml
### Are there any user-facing changes?
No
* Closes: #39900
Authored-by: david dali susanibar arce
Signed-off-by: Sutou Kouhei
---
dev/tasks/tasks.yml | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 0f8c58391fa66..cf04d29715306 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -748,6 +748,10 @@ tasks:
- arrow-jdbc-{no_rc_snapshot_version}-tests.jar
- arrow-jdbc-{no_rc_snapshot_version}.jar
- arrow-jdbc-{no_rc_snapshot_version}.pom
+ - arrow-maven-plugins-{no_rc_snapshot_version}-cyclonedx.json
+ - arrow-maven-plugins-{no_rc_snapshot_version}-cyclonedx.xml
+ - arrow-maven-plugins-{no_rc_snapshot_version}-src.zip
+ - arrow-maven-plugins-{no_rc_snapshot_version}.pom
- arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.json
- arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.xml
- arrow-memory-core-{no_rc_snapshot_version}-javadoc.jar
@@ -762,6 +766,13 @@ tasks:
- arrow-memory-netty-{no_rc_snapshot_version}-tests.jar
- arrow-memory-netty-{no_rc_snapshot_version}.jar
- arrow-memory-netty-{no_rc_snapshot_version}.pom
+ - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-cyclonedx.json
+ - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-cyclonedx.xml
+ - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-javadoc.jar
+ - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-sources.jar
+ - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-tests.jar
+ - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}.jar
+ - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}.pom
- arrow-memory-unsafe-{no_rc_snapshot_version}-cyclonedx.json
- arrow-memory-unsafe-{no_rc_snapshot_version}-cyclonedx.xml
- arrow-memory-unsafe-{no_rc_snapshot_version}-javadoc.jar
@@ -839,6 +850,13 @@ tasks:
- flight-sql-jdbc-driver-{no_rc_snapshot_version}-tests.jar
- flight-sql-jdbc-driver-{no_rc_snapshot_version}.jar
- flight-sql-jdbc-driver-{no_rc_snapshot_version}.pom
+ - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-cyclonedx.json
+ - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-cyclonedx.xml
+ - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-javadoc.jar
+ - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-sources.jar
+ - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-src.zip
+ - module-info-compiler-maven-plugin-{no_rc_snapshot_version}.jar
+ - module-info-compiler-maven-plugin-{no_rc_snapshot_version}.pom
############################## NuGet packages ###############################
From cd5faafb0c811d5985156c1fd1aecd1aa7130e9f Mon Sep 17 00:00:00 2001
From: Sutou Kouhei
Date: Tue, 6 Feb 2024 13:53:54 +0900
Subject: [PATCH 53/74] GH-39955: [C++] Use make -j1 to install bundled bzip2
(#39956)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
### Rationale for this change
It seems that parallel "make install" isn't stable with "-G 'Unix Makefiles'" ("read jobs pipe: Bad file descriptor. Stop." is the important part):
[ 19%] Performing install step for 'bzip2_ep'
CMake Error at /tmp/Rtmp5v99SJ/file70b591df48f/bzip2_ep-prefix/src/bzip2_ep-stamp/bzip2_ep-install-RELEASE.cmake:37 (message):
Command failed: 2
'/bin/make' 'install' 'PREFIX=/tmp/Rtmp5v99SJ/file70b591df48f/bzip2_ep-install' 'CC=/bin/gcc' 'CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/r-base-4.3.2=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC' 'AR=/bin/ar' 'RANLIB=/bin/ranlib'
See also
/tmp/Rtmp5v99SJ/file70b591df48f/bzip2_ep-prefix/src/bzip2_ep-stamp/bzip2_ep-install-*.log
-- stdout output is:
-- stderr output is:
make[3]: *** read jobs pipe: Bad file descriptor. Stop.
make[3]: *** Waiting for unfinished jobs....
bzip2.c: In function ‘applySavedFileAttrToOutputFile’:
bzip2.c:1073:11: warning: ignoring return value of ‘fchown’ declared with attribute ‘warn_unused_result’ [-Wunused-result]
1073 | (void) fchown ( fd, fileMetaInfo.st_uid, fileMetaInfo.st_gid );
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CMake Error at /tmp/Rtmp5v99SJ/file70b591df48f/bzip2_ep-prefix/src/bzip2_ep-stamp/bzip2_ep-install-RELEASE.cmake:47 (message):
Stopping after outputting logs.
make[2]: *** [CMakeFiles/bzip2_ep.dir/build.make:104: bzip2_ep-prefix/src/bzip2_ep-stamp/bzip2_ep-install] Error 1
make[1]: *** [CMakeFiles/Makefile2:1205: CMakeFiles/bzip2_ep.dir/all] Error 2
make[1]: *** Waiting for unfinished jobs....
### What changes are included in this PR?
Force to disable parallel processing for `make install`.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* Closes: #39955
Authored-by: Sutou Kouhei
Signed-off-by: Sutou Kouhei
---
cpp/cmake_modules/ThirdpartyToolchain.cmake | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 0238c26c0fb51..b16ee07756013 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2629,7 +2629,7 @@ macro(build_bzip2)
BUILD_IN_SOURCE 1
BUILD_COMMAND ${MAKE} libbz2.a ${MAKE_BUILD_ARGS}
${BZIP2_EXTRA_ARGS}
- INSTALL_COMMAND ${MAKE} install PREFIX=${BZIP2_PREFIX}
+ INSTALL_COMMAND ${MAKE} install -j1 PREFIX=${BZIP2_PREFIX}
${BZIP2_EXTRA_ARGS}
INSTALL_DIR ${BZIP2_PREFIX}
URL ${ARROW_BZIP2_SOURCE_URL}
From a6e577d031d20a1a7d3dd01536b9a77db5d1bff8 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou
Date: Tue, 6 Feb 2024 16:19:03 +0100
Subject: [PATCH 54/74] GH-39857: [C++] Improve error message for "chunker out
of sync" condition (#39892)
### Rationale for this change
When writing the CSV reader, we thought that the parser not finding the same line limits as the chunker should never happen, hence the terse "chunker out of sync" error message.
It turns out that, if the input contains multiline cell values and the `newlines_in_values` option was not enabled, the chunker can happily delimit a block on a newline that's inside a quoted string. The parser will then see truncated data and will stop parsing, yielding a parsed size that's smaller than the first block (see added comment in the code).
### What changes are included in this PR?
* Add some parser tests that showcase the condition encountered in GH-39857
* Improve error message to guide users towards the solution
### Are these changes tested?
There's no functional change, the error message itself isn't tested.
### Are there any user-facing changes?
No.
* Closes: #39857
Authored-by: Antoine Pitrou
Signed-off-by: Antoine Pitrou
---
cpp/src/arrow/csv/parser_test.cc | 22 +++++++++++++++++++++
cpp/src/arrow/csv/reader.cc | 34 +++++++++++++++++++++++++++-----
python/pyarrow/tests/test_csv.py | 25 +++++++++++++++++++++++
3 files changed, 76 insertions(+), 5 deletions(-)
diff --git a/cpp/src/arrow/csv/parser_test.cc b/cpp/src/arrow/csv/parser_test.cc
index 960a69c59db5d..dd3d025202018 100644
--- a/cpp/src/arrow/csv/parser_test.cc
+++ b/cpp/src/arrow/csv/parser_test.cc
@@ -175,6 +175,13 @@ void AssertParsePartial(BlockParser& parser, const std::string& str,
ASSERT_EQ(parsed_size, expected_size);
}
+void AssertParsePartial(BlockParser& parser, const std::vector& data,
+ uint32_t expected_size) {
+ uint32_t parsed_size = static_cast(-1);
+ ASSERT_OK(parser.Parse(data, &parsed_size));
+ ASSERT_EQ(parsed_size, expected_size);
+}
+
void AssertLastRowEq(const BlockParser& parser,
const std::vector& expected) {
std::vector values;
@@ -376,6 +383,21 @@ TEST(BlockParser, TruncatedData) {
}
}
+TEST(BlockParser, TruncatedDataViews) {
+ // The BlockParser API mandates that, when passing a vector of views,
+ // only the last view may be a truncated CSV block.
+ // In the current implementation, receiving a truncated non-last view
+ // simply stops parsing after that view.
+ BlockParser parser(ParseOptions::Defaults(), /*num_cols=*/3);
+ AssertParsePartial(parser, Views({"a,b,", "c\n"}), 0);
+ AssertParsePartial(parser, Views({"a,b,c\nd,", "e,f\n"}), 6);
+
+ // More sophisticated: non-last block ends on some newline inside a quoted string
+ // (terse reproducer of gh-39857)
+ AssertParsePartial(parser, Views({"a,b,\"c\n", "\"\n"}), 0);
+ AssertParsePartial(parser, Views({"a,b,c\n\"d\n", "\",e,f\n"}), 6);
+}
+
TEST(BlockParser, Final) {
// Tests for ParseFinal()
BlockParser parser(ParseOptions::Defaults());
diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index 332fad054fea3..1ac25e290a814 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -261,11 +261,10 @@ class SerialBlockReader : public BlockReader {
auto consume_bytes = [this, bytes_before_buffer,
next_buffer](int64_t nbytes) -> Status {
DCHECK_GE(nbytes, 0);
- auto offset = nbytes - bytes_before_buffer;
- if (offset < 0) {
- // Should not happen
- return Status::Invalid("CSV parser got out of sync with chunker");
- }
+ int64_t offset = nbytes - bytes_before_buffer;
+ // All data before the buffer should have been consumed.
+ // This is checked in Parse() and BlockParsingOperator::operator().
+ DCHECK_GE(offset, 0);
partial_ = SliceBuffer(buffer_, offset);
buffer_ = next_buffer;
return Status::OK();
@@ -400,6 +399,7 @@ class BlockParsingOperator {
count_rows_(first_row >= 0),
num_rows_seen_(first_row) {}
+ // TODO: this is almost entirely the same as ReaderMixin::Parse(). Refactor?
Result operator()(const CSVBlock& block) {
constexpr int32_t max_num_rows = std::numeric_limits::max();
auto parser = std::make_shared(
@@ -427,9 +427,24 @@ class BlockParsingOperator {
} else {
RETURN_NOT_OK(parser->Parse(views, &parsed_size));
}
+
+ // `partial + completion` should have been entirely consumed.
+ const int64_t bytes_before_buffer = block.partial->size() + block.completion->size();
+ if (static_cast(parsed_size) < bytes_before_buffer) {
+ // This can happen if `newlines_in_values` is not enabled and
+ // `partial + completion` ends with a newline inside a quoted string.
+ // In this case, the BlockParser stops at the truncated data in the first
+ // block (see gh-39857).
+ return Status::Invalid(
+ "CSV parser got out of sync with chunker. This can mean the data file "
+ "contains cell values spanning multiple lines; please consider enabling "
+ "the option 'newlines_in_values'.");
+ }
+
if (count_rows_) {
num_rows_seen_ += parser->total_num_rows();
}
+
RETURN_NOT_OK(block.consume_bytes(parsed_size));
return ParsedBlock{std::move(parser), block.block_index,
static_cast(parsed_size) + block.bytes_skipped};
@@ -738,6 +753,15 @@ class ReaderMixin {
} else {
RETURN_NOT_OK(parser->Parse(views, &parsed_size));
}
+ // See BlockParsingOperator for explanation.
+ const int64_t bytes_before_buffer = partial->size() + completion->size();
+ if (static_cast(parsed_size) < bytes_before_buffer) {
+ return Status::Invalid(
+ "CSV parser got out of sync with chunker. This can mean the data file "
+ "contains cell values spanning multiple lines; please consider enabling "
+ "the option 'newlines_in_values'.");
+ }
+
if (count_rows_) {
num_rows_seen_ += parser->total_num_rows();
}
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 31f24187e3b37..bc1dd8a09a768 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -667,6 +667,31 @@ def row_num(x):
'b': ["e", "j"],
}
+ def test_chunker_out_of_sync(self):
+ # GH-39892: if there are newlines in values, the parser may become
+ # out of sync with the chunker. In this case, we try to produce an
+ # informative error message.
+ rows = b"""a,b,c\nd,e,"f\n"\ng,h,i\n"""
+ expected = {
+ 'a': ["d", "g"],
+ 'b': ["e", "h"],
+ 'c': ["f\n", "i"],
+ }
+ for block_size in range(8, 15):
+ # Sanity check: parsing works with newlines_in_values=True
+ d = self.read_bytes(
+ rows, parse_options=ParseOptions(newlines_in_values=True),
+ read_options=ReadOptions(block_size=block_size)).to_pydict()
+ assert d == expected
+ # With these block sizes, a block would end on the physical newline
+ # inside the quoted cell value, leading to a mismatch between
+ # CSV chunker and parser.
+ for block_size in range(8, 11):
+ with pytest.raises(ValueError,
+ match="cell values spanning multiple lines"):
+ self.read_bytes(
+ rows, read_options=ReadOptions(block_size=block_size))
+
class BaseCSVTableRead(BaseTestCSV):
From 0a05626f08836152526babf103aec95d0e4ec507 Mon Sep 17 00:00:00 2001
From: Jeffrey Vo
Date: Thu, 8 Feb 2024 00:01:46 +1100
Subject: [PATCH 55/74] MINOR: [Rust][Docs] Update Rust FlightSQL status doc
(#39959)
### Rationale for this change
Updating arrow-rs FlightSQL support status on site:
https://arrow.apache.org/docs/status.html#flight-sql
arrow-rs issue: https://github.com/apache/arrow-rs/issues/4337
### What changes are included in this PR?
### Are these changes tested?
### Are there any user-facing changes?
---
docs/source/status.rst | 32 ++++++++++++++++----------------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/docs/source/status.rst b/docs/source/status.rst
index 11dd9c2c2965c..a0375585dbee2 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -257,9 +257,9 @@ support/not support individual features.
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| CancelQuery | ✓ | ✓ | | | | | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| ClosePreparedStatement | ✓ | ✓ | ✓ | | ✓ | | | |
+| ClosePreparedStatement | ✓ | ✓ | ✓ | | ✓ | ✓ | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| CreatePreparedStatement | ✓ | ✓ | ✓ | | ✓ | | | |
+| CreatePreparedStatement | ✓ | ✓ | ✓ | | ✓ | ✓ | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| CreatePreparedSubstraitPlan | ✓ | ✓ | | | | | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -267,35 +267,35 @@ support/not support individual features.
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| EndTransaction | ✓ | ✓ | | | | | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetCatalogs | ✓ | ✓ | ✓ | | ✓ | | | |
+| GetCatalogs | ✓ | ✓ | ✓ | | ✓ | ✓ | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetCrossReference | ✓ | ✓ | ✓ | | ✓ | | | |
+| GetCrossReference | ✓ | ✓ | ✓ | | ✓ | ✓ | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetDbSchemas | ✓ | ✓ | ✓ | | ✓ | | | |
+| GetDbSchemas | ✓ | ✓ | ✓ | | ✓ | ✓ | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetExportedKeys | ✓ | ✓ | ✓ | | ✓ | | | |
+| GetExportedKeys | ✓ | ✓ | ✓ | | ✓ | ✓ | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetImportedKeys | ✓ | ✓ | ✓ | | ✓ | | | |
+| GetImportedKeys | ✓ | ✓ | ✓ | | ✓ | ✓ | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetPrimaryKeys | ✓ | ✓ | ✓ | | ✓ | | | |
+| GetPrimaryKeys | ✓ | ✓ | ✓ | | ✓ | ✓ | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetSqlInfo | ✓ | ✓ | ✓ | | ✓ | | | |
+| GetSqlInfo | ✓ | ✓ | ✓ | | ✓ | ✓ | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetTables | ✓ | ✓ | ✓ | | ✓ | | | |
+| GetTables | ✓ | ✓ | ✓ | | ✓ | ✓ | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetTableTypes | ✓ | ✓ | ✓ | | ✓ | | | |
+| GetTableTypes | ✓ | ✓ | ✓ | | ✓ | ✓ | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetXdbcTypeInfo | ✓ | ✓ | ✓ | | ✓ | | | |
+| GetXdbcTypeInfo | ✓ | ✓ | ✓ | | ✓ | ✓ | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| PreparedStatementQuery | ✓ | ✓ | ✓ | | ✓ | | | |
+| PreparedStatementQuery | ✓ | ✓ | ✓ | | ✓ | ✓ | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| PreparedStatementUpdate | ✓ | ✓ | ✓ | | ✓ | | | |
+| PreparedStatementUpdate | ✓ | ✓ | ✓ | | ✓ | ✓ | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| StatementSubstraitPlan | ✓ | ✓ | | | | | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| StatementQuery | ✓ | ✓ | ✓ | | ✓ | | | |
+| StatementQuery | ✓ | ✓ | ✓ | | ✓ | ✓ | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| StatementUpdate | ✓ | ✓ | ✓ | | ✓ | | | |
+| StatementUpdate | ✓ | ✓ | ✓ | | ✓ | ✓ | | |
+--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
.. seealso::
From c9f6e04323a0b714487a0f707b46fc3c55b909e0 Mon Sep 17 00:00:00 2001
From: Adam Reeve
Date: Thu, 8 Feb 2024 02:32:16 +1300
Subject: [PATCH 56/74] GH-24834: [C#] Support writing compressed IPC data
(#39871)
### Rationale for this change
This allows using compression when writing IPC streams and files with the Arrow .NET library
### What changes are included in this PR?
* Adds a compress method to the `ICompressionCodec` interface and implements this for Zstd and LZ4Frame in the `Apache.Arrow.Compression` package
* Adds new compression related options to `IpcOptions`
* Implements buffer compression in `ArrowStreamWriter`
### Are these changes tested?
Yes, new unit tests have been added
### Are there any user-facing changes?
Yes, this is a new user-facing feature and the `status.rst` and `csharp/README` files have been updated
* Closes: #24834
Authored-by: Adam Reeve
Signed-off-by: Curt Hagenlocher
---
csharp/README.md | 8 +-
.../CompressionCodecFactory.cs | 9 +-
.../Lz4CompressionCodec.cs | 32 ++-
.../ZstdCompressionCodec.cs | 22 ++-
.../src/Apache.Arrow/Ipc/ArrowFileWriter.cs | 10 +-
.../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 117 +++++++++--
.../src/Apache.Arrow/Ipc/ICompressionCodec.cs | 15 ++
.../Ipc/ICompressionCodecFactory.cs | 21 ++
csharp/src/Apache.Arrow/Ipc/IpcOptions.cs | 17 ++
.../Apache.Arrow.Compression.Tests.csproj | 1 +
.../ArrowFileWriterTests.cs | 147 ++++++++++++++
.../ArrowStreamWriterTests.cs | 184 ++++++++++++++++++
.../Apache.Arrow.IntegrationTest.csproj | 1 +
.../IntegrationCommand.cs | 10 +-
dev/archery/archery/integration/runner.py | 1 -
docs/source/status.rst | 4 +-
16 files changed, 564 insertions(+), 35 deletions(-)
create mode 100644 csharp/test/Apache.Arrow.Compression.Tests/ArrowFileWriterTests.cs
create mode 100644 csharp/test/Apache.Arrow.Compression.Tests/ArrowStreamWriterTests.cs
diff --git a/csharp/README.md b/csharp/README.md
index 6e6ed9c756873..b36eb899db2d5 100644
--- a/csharp/README.md
+++ b/csharp/README.md
@@ -115,10 +115,10 @@ for currently available features.
### Compression
-- Buffer compression is not supported when writing IPC files or streams
-- Buffer decompression is supported, but requires installing the `Apache.Arrow.Compression` package,
- and passing an `Apache.Arrow.Compression.CompressionCodecFactory` instance to the
- `ArrowFileReader` or `ArrowStreamReader` constructor.
+- Buffer compression and decompression is supported, but requires installing the `Apache.Arrow.Compression` package.
+ When reading compressed data, you must pass an `Apache.Arrow.Compression.CompressionCodecFactory` instance to the
+ `ArrowFileReader` or `ArrowStreamReader` constructor, and when writing compressed data a
+ `CompressionCodecFactory` must be set in the `IpcOptions`.
Alternatively, a custom implementation of `ICompressionCodecFactory` can be used.
## Not Implemented
diff --git a/csharp/src/Apache.Arrow.Compression/CompressionCodecFactory.cs b/csharp/src/Apache.Arrow.Compression/CompressionCodecFactory.cs
index 3e0a537a89a8f..4bfcdf6544f9d 100644
--- a/csharp/src/Apache.Arrow.Compression/CompressionCodecFactory.cs
+++ b/csharp/src/Apache.Arrow.Compression/CompressionCodecFactory.cs
@@ -24,11 +24,16 @@ namespace Apache.Arrow.Compression
public sealed class CompressionCodecFactory : ICompressionCodecFactory
{
public ICompressionCodec CreateCodec(CompressionCodecType compressionCodecType)
+ {
+ return CreateCodec(compressionCodecType, null);
+ }
+
+ public ICompressionCodec CreateCodec(CompressionCodecType compressionCodecType, int? compressionLevel)
{
return compressionCodecType switch
{
- CompressionCodecType.Lz4Frame => Lz4CompressionCodec.Instance,
- CompressionCodecType.Zstd => new ZstdCompressionCodec(),
+ CompressionCodecType.Lz4Frame => new Lz4CompressionCodec(compressionLevel),
+ CompressionCodecType.Zstd => new ZstdCompressionCodec(compressionLevel),
_ => throw new NotImplementedException($"Compression type {compressionCodecType} is not supported")
};
}
diff --git a/csharp/src/Apache.Arrow.Compression/Lz4CompressionCodec.cs b/csharp/src/Apache.Arrow.Compression/Lz4CompressionCodec.cs
index ebbcfbc3e095f..df19c16a30213 100644
--- a/csharp/src/Apache.Arrow.Compression/Lz4CompressionCodec.cs
+++ b/csharp/src/Apache.Arrow.Compression/Lz4CompressionCodec.cs
@@ -14,17 +14,35 @@
// limitations under the License.
using System;
+using System.IO;
using Apache.Arrow.Ipc;
+using K4os.Compression.LZ4;
using K4os.Compression.LZ4.Streams;
namespace Apache.Arrow.Compression
{
internal sealed class Lz4CompressionCodec : ICompressionCodec
{
- ///
- /// Singleton instance, used as this class doesn't need to be disposed and has no state
- ///
- public static readonly Lz4CompressionCodec Instance = new Lz4CompressionCodec();
+ private readonly LZ4EncoderSettings _settings = null;
+
+ public Lz4CompressionCodec(int? compressionLevel = null)
+ {
+ if (compressionLevel.HasValue)
+ {
+ if (Enum.IsDefined(typeof(LZ4Level), compressionLevel))
+ {
+ _settings = new LZ4EncoderSettings
+ {
+ CompressionLevel = (LZ4Level) compressionLevel,
+ };
+ }
+ else
+ {
+ throw new ArgumentException(
+ $"Invalid LZ4 compression level ({compressionLevel})", nameof(compressionLevel));
+ }
+ }
+ }
public int Decompress(ReadOnlyMemory source, Memory destination)
{
@@ -32,6 +50,12 @@ public int Decompress(ReadOnlyMemory source, Memory destination)
return decoder.ReadManyBytes(destination.Span);
}
+ public void Compress(ReadOnlyMemory source, Stream destination)
+ {
+ using var encoder = LZ4Frame.Encode(destination, _settings, leaveOpen: true);
+ encoder.WriteManyBytes(source.Span);
+ }
+
public void Dispose()
{
}
diff --git a/csharp/src/Apache.Arrow.Compression/ZstdCompressionCodec.cs b/csharp/src/Apache.Arrow.Compression/ZstdCompressionCodec.cs
index 92c2e65371612..cc340a7cd1b9f 100644
--- a/csharp/src/Apache.Arrow.Compression/ZstdCompressionCodec.cs
+++ b/csharp/src/Apache.Arrow.Compression/ZstdCompressionCodec.cs
@@ -14,6 +14,7 @@
// limitations under the License.
using System;
+using System.IO;
using Apache.Arrow.Ipc;
using ZstdSharp;
@@ -22,10 +23,21 @@ namespace Apache.Arrow.Compression
internal sealed class ZstdCompressionCodec : ICompressionCodec
{
private readonly Decompressor _decompressor;
+ private readonly Compressor _compressor;
- public ZstdCompressionCodec()
+ public ZstdCompressionCodec(int? compressionLevel = null)
{
+ if (compressionLevel.HasValue &&
+ (compressionLevel.Value < Compressor.MinCompressionLevel ||
+ compressionLevel.Value > Compressor.MaxCompressionLevel))
+ {
+ throw new ArgumentException(
+ $"Zstd compression level must be between {Compressor.MinCompressionLevel} and {Compressor.MaxCompressionLevel}",
+ nameof(compressionLevel));
+ }
+
_decompressor = new Decompressor();
+ _compressor = new Compressor(compressionLevel ?? Compressor.DefaultCompressionLevel);
}
public int Decompress(ReadOnlyMemory source, Memory destination)
@@ -33,9 +45,17 @@ public int Decompress(ReadOnlyMemory source, Memory destination)
return _decompressor.Unwrap(source.Span, destination.Span);
}
+ public void Compress(ReadOnlyMemory source, Stream destination)
+ {
+ using var compressor = new CompressionStream(
+ destination, _compressor, preserveCompressor: true, leaveOpen: true);
+ compressor.Write(source.Span);
+ }
+
public void Dispose()
{
_decompressor.Dispose();
+ _compressor.Dispose();
}
}
}
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
index 547fa800ec71e..a643012bab1a2 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
@@ -20,6 +20,7 @@
using System.IO;
using System.Threading;
using System.Threading.Tasks;
+using Apache.Arrow.Memory;
namespace Apache.Arrow.Ipc
{
@@ -37,12 +38,17 @@ public ArrowFileWriter(Stream stream, Schema schema)
}
public ArrowFileWriter(Stream stream, Schema schema, bool leaveOpen)
- : this(stream, schema, leaveOpen, options: null)
+ : this(stream, schema, leaveOpen, options: null, allocator: null)
{
}
public ArrowFileWriter(Stream stream, Schema schema, bool leaveOpen, IpcOptions options)
- : base(stream, schema, leaveOpen, options)
+ : this(stream, schema, leaveOpen, options, allocator: null)
+ {
+ }
+
+ public ArrowFileWriter(Stream stream, Schema schema, bool leaveOpen, IpcOptions options, MemoryAllocator allocator)
+ : base(stream, schema, leaveOpen, options, allocator)
{
if (!stream.CanWrite)
{
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 07d1dcfdb171d..b002f8c8b1578 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -22,6 +22,7 @@
using System.Threading;
using System.Threading.Tasks;
using Apache.Arrow.Arrays;
+using Apache.Arrow.Memory;
using Apache.Arrow.Types;
using Google.FlatBuffers;
@@ -29,7 +30,7 @@ namespace Apache.Arrow.Ipc
{
public class ArrowStreamWriter : IDisposable
{
- internal class ArrowRecordBatchFlatBufferBuilder :
+ private class ArrowRecordBatchFlatBufferBuilder :
IArrowArrayVisitor,
IArrowArrayVisitor,
IArrowArrayVisitor,
@@ -81,14 +82,21 @@ public Buffer(ArrowBuffer buffer, int offset)
}
private readonly List _buffers;
+ private readonly ICompressionCodec _compressionCodec;
+ private readonly MemoryAllocator _allocator;
+ private readonly MemoryStream _compressionStream;
public IReadOnlyList