From c2ca9bcedeb004f9d7f5d3e1aafc7b83ce6c1e3f Mon Sep 17 00:00:00 2001
From: Matt Topol <zotthewizard@gmail.com>
Date: Mon, 29 Jan 2024 15:39:10 -0500
Subject: [PATCH 01/74] GH-39837: [Go][Flight] Allow cloning existing cookies
 in middleware (#39838)

### Rationale for this change
This is needed for https://github.com/apache/arrow-adbc/issues/1194 to facilitate better connection handling for flight clients in ADBC by copying the existing cookies over when creating a sub-client.

### What changes are included in this PR?
Creating a `Clone` method on the `CookieMiddleware` so that a user can create and hold a reference to a specific cookie middleware instance and then create new ones on the fly that copy over the existing cookies at that moment.

### Are these changes tested?
Yes.

### Are there any user-facing changes?
No

* Closes: #39837

Authored-by: Matt Topol <zotthewizard@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/arrow/flight/cookie_middleware.go      | 24 +++++++++
 go/arrow/flight/cookie_middleware_test.go | 60 +++++++++++++++++++++++
 2 files changed, 84 insertions(+)

diff --git a/go/arrow/flight/cookie_middleware.go b/go/arrow/flight/cookie_middleware.go
index 27754a13b829a..39c86d8303434 100644
--- a/go/arrow/flight/cookie_middleware.go
+++ b/go/arrow/flight/cookie_middleware.go
@@ -23,6 +23,7 @@ import (
 	"sync"
 	"time"
 
+	"golang.org/x/exp/maps"
 	"google.golang.org/grpc/metadata"
 )
 
@@ -40,11 +41,34 @@ func NewClientCookieMiddleware() ClientMiddleware {
 	return CreateClientMiddleware(&clientCookieMiddleware{jar: make(map[string]http.Cookie)})
 }
 
+func NewCookieMiddleware() CookieMiddleware {
+	return &clientCookieMiddleware{jar: make(map[string]http.Cookie)}
+}
+
+// CookieMiddleware is a go-routine safe middleware for flight clients
+// which properly handles Set-Cookie headers for storing cookies.
+// This can be passed into `CreateClientMiddleware` to create a new
+// middleware object. You can also clone it to create middleware for a
+// new client which starts with the same cookies.
+type CookieMiddleware interface {
+	CustomClientMiddleware
+	// Clone creates a new CookieMiddleware that starts out with the same
+	// cookies that this one already has. This is useful when creating a
+	// new client connection for the same server.
+	Clone() CookieMiddleware
+}
+
 type clientCookieMiddleware struct {
 	jar map[string]http.Cookie
 	mx  sync.Mutex
 }
 
+func (cc *clientCookieMiddleware) Clone() CookieMiddleware {
+	cc.mx.Lock()
+	defer cc.mx.Unlock()
+	return &clientCookieMiddleware{jar: maps.Clone(cc.jar)}
+}
+
 func (cc *clientCookieMiddleware) StartCall(ctx context.Context) context.Context {
 	cc.mx.Lock()
 	defer cc.mx.Unlock()
diff --git a/go/arrow/flight/cookie_middleware_test.go b/go/arrow/flight/cookie_middleware_test.go
index 0adf4927652d4..4007d056b2c99 100644
--- a/go/arrow/flight/cookie_middleware_test.go
+++ b/go/arrow/flight/cookie_middleware_test.go
@@ -239,3 +239,63 @@ func TestCookieExpiration(t *testing.T) {
 	cookieMiddleware.expectedCookies = map[string]string{}
 	makeReq(client, t)
 }
+
+func TestCookiesClone(t *testing.T) {
+	cookieMiddleware := &serverAddCookieMiddleware{}
+
+	s := flight.NewServerWithMiddleware([]flight.ServerMiddleware{
+		flight.CreateServerMiddleware(cookieMiddleware),
+	})
+	s.Init("localhost:0")
+	f := &flightServer{}
+	s.RegisterFlightService(f)
+
+	go s.Serve()
+	defer s.Shutdown()
+
+	makeReq := func(c flight.Client, t *testing.T) {
+		flightStream, err := c.ListFlights(context.Background(), &flight.Criteria{})
+		assert.NoError(t, err)
+
+		for {
+			_, err := flightStream.Recv()
+			if err != nil {
+				if errors.Is(err, io.EOF) {
+					break
+				}
+				assert.NoError(t, err)
+			}
+		}
+	}
+
+	credsOpt := grpc.WithTransportCredentials(insecure.NewCredentials())
+	cookies := flight.NewCookieMiddleware()
+	client1, err := flight.NewClientWithMiddleware(s.Addr().String(), nil,
+		[]flight.ClientMiddleware{flight.CreateClientMiddleware(cookies)}, credsOpt)
+	require.NoError(t, err)
+	defer client1.Close()
+
+	// set cookies
+	cookieMiddleware.cookies = []*http.Cookie{
+		{Name: "foo", Value: "bar"},
+		{Name: "foo2", Value: "bar2", MaxAge: 1},
+	}
+	makeReq(client1, t)
+
+	// validate set
+	cookieMiddleware.expectedCookies = map[string]string{
+		"foo": "bar", "foo2": "bar2",
+	}
+	makeReq(client1, t)
+
+	client2, err := flight.NewClientWithMiddleware(s.Addr().String(), nil,
+		[]flight.ClientMiddleware{flight.CreateClientMiddleware(cookies.Clone())}, credsOpt)
+	require.NoError(t, err)
+	defer client2.Close()
+
+	// validate clone worked
+	cookieMiddleware.expectedCookies = map[string]string{
+		"foo": "bar", "foo2": "bar2",
+	}
+	makeReq(client2, t)
+}

From fc3278ffb78e6f4f79cd20160bf911efa5a09ba1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 06:01:22 +0900
Subject: [PATCH 02/74] MINOR: [Java] Bump org.immutables:value from 2.8.2 to
 2.10.0 in /java (#39831)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.immutables:value](https://github.com/immutables/immutables) from 2.8.2 to 2.10.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/immutables/immutables/releases">org.immutables:value's releases</a>.</em></p>
<blockquote>
<h2>2.10.0</h2>
<h1>JakartaEE support</h1>
<p>Style flag <code>jakarta = true</code>
Mainly package change for annotations and types like Validator</p>
<h1>Miscellaneous</h1>
<ul>
<li>JDK9 unmodifiable collections for <code>List</code>, <code>Set</code>, <code>Map</code>, style flag <code>jdk9Collections = true</code></li>
<li>Suppress from method, style flag <code>from = &quot;&quot;</code></li>
<li>Non-strict modifiables allows reading unset attributes, style flag <code>strictModifiables = false</code></li>
<li>Fixes in nested type_use annotations.</li>
<li>Performance: better initial capacity for collections</li>
<li>Refinements and fixes to Criteria modules</li>
<li>Plus many other refinements and maintance, see below</li>
</ul>
<h1>Workarounds for Gradle</h1>
<ul>
<li>imports for not-yet-generated types : add <code>options.sourcepath</code></li>
<li>disable incremental compilation (<code>options.incremental</code>), may also help is some complex cases</li>
</ul>
<h2>What's Changed (since some last year's release)</h2>
<ul>
<li>guava upgrade to 30.0-jre by <a href="https://github.com/elucash"><code>@​elucash</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1422">immutables/immutables#1422</a></li>
<li>Fix sorting by id property with Criteria for Mongo <a href="https://redirect.github.com/immutables/immutables/issues/1442">#1442</a> by <a href="https://github.com/harmenweber"><code>@​harmenweber</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1443">immutables/immutables#1443</a></li>
<li>Fix <a href="https://redirect.github.com/immutables/immutables/issues/1424">#1424</a> - redundant null check in JDK maps by <a href="https://github.com/saarmbruster"><code>@​saarmbruster</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1425">immutables/immutables#1425</a></li>
<li>detect the Eclipse compiler without using the TCCL by <a href="https://github.com/hwellmann"><code>@​hwellmann</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1452">immutables/immutables#1452</a></li>
<li>Experimental <a href="https://redirect.github.com/immutables/immutables/issues/1112">#1112</a> requiring Enclosing annotation by <a href="https://github.com/elucash"><code>@​elucash</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1423">immutables/immutables#1423</a></li>
<li>Enable eclipse compiler support for Java 17 and beyond. by <a href="https://github.com/SimY4"><code>@​SimY4</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1458">immutables/immutables#1458</a></li>
<li>Fix <code>toBuilder</code> generator with generics by <a href="https://github.com/rdesgroppes"><code>@​rdesgroppes</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1460">immutables/immutables#1460</a></li>
<li>Populate correct initial capacity for List/Set/Map/ImmutableList.Builder by <a href="https://github.com/snazy"><code>@​snazy</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1468">immutables/immutables#1468</a></li>
<li>Allow specifying visibility as string by <a href="https://github.com/nastra"><code>@​nastra</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1474">immutables/immutables#1474</a></li>
<li>Re-try to resolve types in java.lang by <a href="https://github.com/PtrTeixeira"><code>@​PtrTeixeira</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1477">immutables/immutables#1477</a></li>
<li>re-enable jdk9 collections support. by <a href="https://github.com/SimY4"><code>@​SimY4</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1481">immutables/immutables#1481</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/harmenweber"><code>@​harmenweber</code></a> made their first contribution in <a href="https://redirect.github.com/immutables/immutables/pull/1443">immutables/immutables#1443</a></li>
<li><a href="https://github.com/saarmbruster"><code>@​saarmbruster</code></a> made their first contribution in <a href="https://redirect.github.com/immutables/immutables/pull/1425">immutables/immutables#1425</a></li>
<li><a href="https://github.com/hwellmann"><code>@​hwellmann</code></a> made their first contribution in <a href="https://redirect.github.com/immutables/immutables/pull/1452">immutables/immutables#1452</a></li>
<li><a href="https://github.com/rdesgroppes"><code>@​rdesgroppes</code></a> made their first contribution in <a href="https://redirect.github.com/immutables/immutables/pull/1460">immutables/immutables#1460</a></li>
<li><a href="https://github.com/snazy"><code>@​snazy</code></a> made their first contribution in <a href="https://redirect.github.com/immutables/immutables/pull/1468">immutables/immutables#1468</a></li>
<li><a href="https://github.com/PtrTeixeira"><code>@​PtrTeixeira</code></a> made their first contribution in <a href="https://redirect.github.com/immutables/immutables/pull/1477">immutables/immutables#1477</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/immutables/immutables/compare/2.9.3...2.10.0">https://github.com/immutables/immutables/compare/2.9.3...2.10.0</a></p>
<h2>2.9.3</h2>
<h2>Maintenance release</h2>
<h2>What's Changed</h2>
<ul>
<li><a href="https://redirect.github.com/immutables/immutables/issues/1408">#1408</a> withUnaryOperator style flag/naming template by <a href="https://github.com/elucash"><code>@​elucash</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1409">immutables/immutables#1409</a></li>
<li>Avoid invalidating Gson type adapter cache when creating Mongo Repository by <a href="https://github.com/jmoghisi"><code>@​jmoghisi</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1406">immutables/immutables#1406</a></li>
<li>fixing <a href="https://redirect.github.com/immutables/immutables/issues/1407">#1407</a> - Default value with explicit null should use default. by <a href="https://github.com/SimY4"><code>@​SimY4</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1410">immutables/immutables#1410</a></li>
<li>Fix counts in WriteResult for upsert operation by <a href="https://github.com/dparamoshkin"><code>@​dparamoshkin</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1412">immutables/immutables#1412</a></li>
</ul>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/immutables/immutables/commit/29a2230477f196d583c55ca4f1c1e1705767d07c"><code>29a2230</code></a> relese 2.10.0</li>
<li><a href="https://github.com/immutables/immutables/commit/34c76bd1f372f5b3c391533b7763367cebedb0b5"><code>34c76bd</code></a> Java7 leftovers</li>
<li><a href="https://github.com/immutables/immutables/commit/c4597efc3ef38c3021910284633a1e2583f2d8c5"><code>c4597ef</code></a> <a href="https://redirect.github.com/immutables/immutables/issues/1483">#1483</a> strictModifiable=false</li>
<li><a href="https://github.com/immutables/immutables/commit/e07209c4930552bf658eb77d4f454f0946b9ae36"><code>e07209c</code></a> Styles: jdk9Collections, from=&quot;&quot;</li>
<li><a href="https://github.com/immutables/immutables/commit/3f85a4274693acdbb0f080e7a2fe0f84bc7dfe92"><code>3f85a42</code></a> Merge pull request <a href="https://redirect.github.com/immutables/immutables/issues/1481">#1481</a> from SimY4/topic/re-enable-jdk9-collections</li>
<li><a href="https://github.com/immutables/immutables/commit/be599d44f933c182d45b8d0d2ca9b3a02c36a449"><code>be599d4</code></a> Fix ClassCastException when projections are used with ReactorFacet.</li>
<li><a href="https://github.com/immutables/immutables/commit/b9df716fe8e17fd15473158de642a4aafeb8b1c7"><code>b9df716</code></a> Revert utility functions.</li>
<li><a href="https://github.com/immutables/immutables/commit/711c7bcb284a2b60b98fc5ecac61d755ea8c3217"><code>711c7bc</code></a> re-enable jdk9 collections support.</li>
<li><a href="https://github.com/immutables/immutables/commit/c98038e82512ec255a24a0dfd6971eab52bd5b8e"><code>c98038e</code></a> <a href="https://redirect.github.com/immutables/immutables/issues/1479">#1479</a> need to look at value, not key</li>
<li><a href="https://github.com/immutables/immutables/commit/c643bef48d509a5283e7efe7118b9172aac355c7"><code>c643bef</code></a> <a href="https://redirect.github.com/immutables/immutables/issues/1477">#1477</a> refinements after PR</li>
<li>Additional commits viewable in <a href="https://github.com/immutables/immutables/compare/2.8.2...2.10.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.immutables:value&package-manager=maven&previous-version=2.8.2&new-version=2.10.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/pom.xml b/java/pom.xml
index 3951f1c1bc8ed..2423e2d495d11 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -438,7 +438,7 @@
               <path>
                 <groupId>org.immutables</groupId>
                 <artifactId>value</artifactId>
-                <version>2.8.2</version>
+                <version>2.10.0</version>
               </path>
             </annotationProcessorPaths>
           </configuration>
@@ -653,7 +653,7 @@
       <dependency>
         <groupId>org.immutables</groupId>
         <artifactId>value</artifactId>
-        <version>2.8.2</version>
+        <version>2.10.0</version>
         <scope>provided</scope>
       </dependency>
       <dependency>

From 7fd59739fddf4b614c68d57e24068542b4cf2884 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 06:01:42 +0900
Subject: [PATCH 03/74] MINOR: [Java] Bump
 org.apache.maven.plugins:maven-gpg-plugin from 1.5 to 3.1.0 in /java (#39832)

Bumps [org.apache.maven.plugins:maven-gpg-plugin](https://github.com/apache/maven-gpg-plugin) from 1.5 to 3.1.0.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/699e2ad61179ef661693f99d1d9c147e2f6bcd04"><code>699e2ad</code></a> [maven-release-plugin] prepare release maven-gpg-plugin-3.1.0</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/f314f8e879d6acbfe88386538f882ecedc9ad18a"><code>f314f8e</code></a> [MGPG-97] use gpgverify plugin to check dependencies signatures</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/bad6b5708c58e5e4d868e30509acc38f552982fd"><code>bad6b57</code></a> [MGPG-96] add INFO message</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/0498a82b6b6b67bf46d61e66e945c093365e63df"><code>0498a82</code></a> [MGPG-95] don't GPG-sign .sigstore signatures</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/09b5be93972ae0c95c741fc0f111a96f51119aa7"><code>09b5be9</code></a> Auto-link MGPG Jira</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/1e0472f2e59d037bdde38ccde38897e04d89e206"><code>1e0472f</code></a> extract FilesCollector</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/af9ccfdd3b12502f9da225a7b394c541a6b4088c"><code>af9ccfd</code></a> [MGPG-94] Ignore reformatting</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/5e51734d22f4541fcbf2d03aae24f7501b67abe1"><code>5e51734</code></a> [MGPG-94] Integration tests - convert and reformat bsh to groovy</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/955ea0ed5c2a7a3189159158e1fcb10e1ed2964e"><code>955ea0e</code></a> [MGPG-94] Reformat code</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/e160f4358ccde49da7f3c7339a33774a536db457"><code>e160f43</code></a> [MGPG-94] Bump maven-plugins from 36 to 39</li>
<li>Additional commits viewable in <a href="https://github.com/apache/maven-gpg-plugin/compare/maven-gpg-plugin-1.5...maven-gpg-plugin-3.1.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-gpg-plugin&package-manager=maven&previous-version=1.5&new-version=3.1.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/gandiva/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml
index d0290b6814ed5..6337efcf7e348 100644
--- a/java/gandiva/pom.xml
+++ b/java/gandiva/pom.xml
@@ -96,7 +96,7 @@
                     <plugin>
                         <groupId>org.apache.maven.plugins</groupId>
                         <artifactId>maven-gpg-plugin</artifactId>
-                        <version>1.5</version>
+                        <version>3.1.0</version>
                         <executions>
                             <execution>
                                 <id>sign-artifacts</id>

From 3b8b700348f5d73fa4cfdb2780b0bde5d83a7f22 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 06:02:06 +0900
Subject: [PATCH 04/74] MINOR: [Java] Bump org.apache.hadoop:hadoop-common from
 2.7.1 to 3.3.6 in /java (#39833)

Bumps org.apache.hadoop:hadoop-common from 2.7.1 to 3.3.6.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.hadoop:hadoop-common&package-manager=maven&previous-version=2.7.1&new-version=3.3.6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/adapter/orc/pom.xml | 2 +-
 java/pom.xml             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index 265a9a71b80e2..79e51470a426e 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -75,7 +75,7 @@
         <dependency>
             <groupId>org.apache.hadoop</groupId>
             <artifactId>hadoop-common</artifactId>
-            <version>3.3.3</version>
+            <version>3.3.6</version>
             <scope>test</scope>
             <exclusions>
                 <exclusion>
diff --git a/java/pom.xml b/java/pom.xml
index 2423e2d495d11..3947f76cae849 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -37,7 +37,7 @@
     <dep.grpc-bom.version>1.60.0</dep.grpc-bom.version>
     <dep.protobuf-bom.version>3.23.1</dep.protobuf-bom.version>
     <dep.jackson-bom.version>2.16.0</dep.jackson-bom.version>
-    <dep.hadoop.version>2.7.1</dep.hadoop.version>
+    <dep.hadoop.version>3.3.6</dep.hadoop.version>
     <dep.fbs.version>23.5.26</dep.fbs.version>
     <dep.avro.version>1.11.3</dep.avro.version>
     <arrow.vector.classifier />

From 91d65b79f71a1be6a0bf7426e0ee91dd2e65a852 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 06:02:31 +0900
Subject: [PATCH 05/74] MINOR: [Java] Bump io.netty:netty-bom from
 4.1.105.Final to 4.1.106.Final in /java (#39834)

Bumps [io.netty:netty-bom](https://github.com/netty/netty) from 4.1.105.Final to 4.1.106.Final.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/netty/netty/commit/9d0ec7b9356487f11921340c6b54f9f0c8c8b4b7"><code>9d0ec7b</code></a> [maven-release-plugin] prepare release netty-4.1.106.Final</li>
<li><a href="https://github.com/netty/netty/commit/e2859f4f42cb237484c58ea19f757f88cf610c4b"><code>e2859f4</code></a> Short-circuit ByteBuf::release (<a href="https://redirect.github.com/netty/netty/issues/13782">#13782</a>)</li>
<li><a href="https://github.com/netty/netty/commit/d9ca50d5a4acec37c822764fb84d047f41097b21"><code>d9ca50d</code></a> Prevent sharing the index of the continuation frame header ByteBuf. (<a href="https://redirect.github.com/netty/netty/issues/13786">#13786</a>)</li>
<li><a href="https://github.com/netty/netty/commit/0e7c27c6536d8411ba4c8f63429a640b129d34bc"><code>0e7c27c</code></a> DnsNameResolver: Fail query if id space is exhausted (<a href="https://redirect.github.com/netty/netty/issues/13784">#13784</a>)</li>
<li><a href="https://github.com/netty/netty/commit/b1947417b7444326965086b951b75f0128cf7085"><code>b194741</code></a> [maven-release-plugin] prepare for next development iteration</li>
<li>See full diff in <a href="https://github.com/netty/netty/compare/netty-4.1.105.Final...netty-4.1.106.Final">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=io.netty:netty-bom&package-manager=maven&previous-version=4.1.105.Final&new-version=4.1.106.Final)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 3947f76cae849..4888f833df096 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -33,7 +33,7 @@
     <dep.junit.jupiter.version>5.10.1</dep.junit.jupiter.version>
     <dep.slf4j.version>2.0.11</dep.slf4j.version>
     <dep.guava-bom.version>33.0.0-jre</dep.guava-bom.version>
-    <dep.netty-bom.version>4.1.105.Final</dep.netty-bom.version>
+    <dep.netty-bom.version>4.1.106.Final</dep.netty-bom.version>
     <dep.grpc-bom.version>1.60.0</dep.grpc-bom.version>
     <dep.protobuf-bom.version>3.23.1</dep.protobuf-bom.version>
     <dep.jackson-bom.version>2.16.0</dep.jackson-bom.version>

From 63498c2891c757aca016305c61e4a0ba82faed2b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 06:02:55 +0900
Subject: [PATCH 06/74] MINOR: [Java] Bump
 org.apache.maven.plugins:maven-enforcer-plugin from 3.0.0-M2 to 3.4.1 in
 /java (#39835)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.apache.maven.plugins:maven-enforcer-plugin](https://github.com/apache/maven-enforcer) from 3.0.0-M2 to 3.4.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/apache/maven-enforcer/releases">org.apache.maven.plugins:maven-enforcer-plugin's releases</a>.</em></p>
<blockquote>
<h2>3.4.1</h2>

<h2>🐛 Bug Fixes</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-491">[MENFORCER-491]</a> - Fix plugin documentation generation (<a href="https://redirect.github.com/apache/maven-enforcer/pull/286">#286</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
</ul>
<h2>👻 Maintenance</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-490">[MENFORCER-490]</a> - Declare maven-enforcer-plugin dependencies  (<a href="https://redirect.github.com/apache/maven-enforcer/pull/285">#285</a>) <a href="https://github.com/elharo"><code>@​elharo</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-490">[MENFORCER-490]</a> - Declare org.eclipse.sisu.plexus dependencies (<a href="https://redirect.github.com/apache/maven-enforcer/pull/283">#283</a>) <a href="https://github.com/elharo"><code>@​elharo</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-490">[MENFORCER-490]</a> - Declare maven-enforcer-extension dependencies (<a href="https://redirect.github.com/apache/maven-enforcer/pull/284">#284</a>) <a href="https://github.com/elharo"><code>@​elharo</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-490">[MENFORCER-490]</a> - Declare maven-enforcer-extension dependencies (<a href="https://redirect.github.com/apache/maven-enforcer/pull/282">#282</a>) <a href="https://github.com/elharo"><code>@​elharo</code></a></li>
</ul>
<h2>3.4.0</h2>

<h2>🚀 New features and improvements</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-488">[MENFORCER-488]</a> - Add EnforcerLogger.isEnabled() (<a href="https://redirect.github.com/apache/maven-enforcer/pull/279">#279</a>) <a href="https://github.com/kwin"><code>@​kwin</code></a></li>
</ul>
<h2>🐛 Bug Fixes</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-393">[MENFORCER-393]</a> - Extend IT for dependencyConvergence and no standard protocol in repository (<a href="https://redirect.github.com/apache/maven-enforcer/pull/271">#271</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-426">[MENFORCER-426]</a> - DependencyConvergence transitive dependencies with version range (<a href="https://redirect.github.com/apache/maven-enforcer/pull/259">#259</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-480">[MENFORCER-480]</a> - BanDynamicVersions: fix <code>ignores</code> parameter (<a href="https://redirect.github.com/apache/maven-enforcer/pull/269">#269</a>) <a href="https://github.com/Stephan202"><code>@​Stephan202</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-481">[MENFORCER-481]</a> - BanDynamicVersions: make <code>excludedScopes</code> optional (<a href="https://redirect.github.com/apache/maven-enforcer/pull/270">#270</a>) <a href="https://github.com/Stephan202"><code>@​Stephan202</code></a></li>
</ul>
<h2>📦 Dependency updates</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-489">[MENFORCER-489]</a> - Bump commons-lang3 from 3.12.0 to 3.13.0 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/281">#281</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li>Bump org.junit:junit-bom from 5.9.3 to 5.10.0 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/280">#280</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-487">[MENFORCER-487]</a> - Bump commons-codec from 1.15 to 1.16.0 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/277">#277</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-486">[MENFORCER-486]</a> - Bump commons-codec from 1.15 to 1.16.0 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/276">#276</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-485">[MENFORCER-485]</a> - Upgrade Parent to 40 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/275">#275</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li>Bump guava from 30.1.1-jre to 32.0.0-jre in /maven-enforcer-plugin/src/it/projects/dependency-convergence-cycle (<a href="https://redirect.github.com/apache/maven-enforcer/pull/274">#274</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump snappy-java from 1.1.8.3 to 1.1.10.1 in /maven-enforcer-plugin/src/it/projects/dependency-convergence_transitive_provided/module1 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/273">#273</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump junit-bom from 5.9.2 to 5.9.3 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/268">#268</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
</ul>
<h2>📝 Documentation updates</h2>
<ul>
<li>Clarify availability of AbstractEnforcerRule (<a href="https://redirect.github.com/apache/maven-enforcer/pull/278">#278</a>) <a href="https://github.com/kwin"><code>@​kwin</code></a></li>
</ul>
<h2>👻 Maintenance</h2>
<ul>
<li>Bump org.junit:junit-bom from 5.9.3 to 5.10.0 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/280">#280</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump snappy-java from 1.1.8.3 to 1.1.10.1 in /maven-enforcer-plugin/src/it/projects/dependency-convergence_transitive_provided/module1 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/273">#273</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MNG-6829">[MNG-6829]</a> - Replace StringUtils#isEmpty(String) and #isNotEmpty(String) (<a href="https://redirect.github.com/apache/maven-enforcer/pull/272">#272</a>) <a href="https://github.com/timtebeek"><code>@​timtebeek</code></a></li>
</ul>
<h2>3.3.0</h2>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/maven-enforcer/commit/d8a21eef9f9fdd9b88c02807f4059e9e7279ad26"><code>d8a21ee</code></a> [maven-release-plugin] prepare release enforcer-3.4.1</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/66250c0addc3dc19e42e102d05b20ef57584ccea"><code>66250c0</code></a> [MENFORCER-491] Fix plugin documentation generation</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/5d32e6c53e8be47c5ad56b048e8b4c9c834b4b13"><code>5d32e6c</code></a> [MENFORCER-490] Declare maven-enforcer-plugin dependencies  (<a href="https://redirect.github.com/apache/maven-enforcer/issues/285">#285</a>)</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/d258109dd982d1f037a2e1dbbf643878b4ce566e"><code>d258109</code></a> [MENFORCER-490] Declare org.eclipse.sisu.plexus dependencies (<a href="https://redirect.github.com/apache/maven-enforcer/issues/283">#283</a>)</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/2aa71e792cf2817ae8e355e5483df11a74b78ea1"><code>2aa71e7</code></a> [MENFORCER-490] Declare maven-enforcer-extension dependencies (<a href="https://redirect.github.com/apache/maven-enforcer/issues/284">#284</a>)</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/d4ec8e1ce84f7a7ae1947b660477d24bad4ad41d"><code>d4ec8e1</code></a> [MENFORCER-490] Declare maven-enforcer-extension dependencies (<a href="https://redirect.github.com/apache/maven-enforcer/issues/282">#282</a>)</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/b35e4a0b99a6a524a9719b4810150d6e4a510521"><code>b35e4a0</code></a> [maven-release-plugin] prepare for next development iteration</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/3d365f7eb932a6e9b5496099e4a0374f62e02f08"><code>3d365f7</code></a> [maven-release-plugin] prepare release enforcer-3.4.0</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/5feb93a695c8b0ca3bc84e8e2907499de6632682"><code>5feb93a</code></a> [MENFORCER-489] Bump commons-lang3 from 3.12.0 to 3.13.0</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/8f2de47622b55f6d3279cb14b9f68f6774d47e4c"><code>8f2de47</code></a> Bump org.junit:junit-bom from 5.9.3 to 5.10.0</li>
<li>Additional commits viewable in <a href="https://github.com/apache/maven-enforcer/compare/enforcer-3.0.0-M2...enforcer-3.4.1">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-enforcer-plugin&package-manager=maven&previous-version=3.0.0-M2&new-version=3.4.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 4888f833df096..3e595648ed085 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -445,7 +445,7 @@
         </plugin>
         <plugin>
           <artifactId>maven-enforcer-plugin</artifactId>
-          <version>3.0.0-M2</version>
+          <version>3.4.1</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>

From b778ace6622614035acc1bbe17b06bdc8141d9fe Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 30 Jan 2024 11:54:57 +0900
Subject: [PATCH 07/74] GH-39841: [GLib] Add support for GLib 2.56 again
 (#39842)

### Rationale for this change

It's still used in CentOS 7 and AlmaLinux 8.

### What changes are included in this PR?

Don't use `g_time_zone_get_identifier()` with GLib < 2.58.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* Closes: #39841

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/arrow-glib/basic-data-type.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp
index 0de9466eee456..98b2c92104507 100644
--- a/c_glib/arrow-glib/basic-data-type.cpp
+++ b/c_glib/arrow-glib/basic-data-type.cpp
@@ -1212,7 +1212,8 @@ garrow_timestamp_data_type_class_init(GArrowTimestampDataTypeClass *klass)
 /**
  * garrow_timestamp_data_type_new:
  * @unit: The unit of the timestamp data.
- * @time_zone: (nullable): The time zone of the timestamp data.
+ * @time_zone: (nullable): The time zone of the timestamp data. If based GLib
+ *   is less than 2.58, this is ignored.
  *
  * Returns: A newly created the number of
  *   seconds/milliseconds/microseconds/nanoseconds since UNIX epoch in
@@ -1226,9 +1227,11 @@ garrow_timestamp_data_type_new(GArrowTimeUnit unit,
 {
   auto arrow_unit = garrow_time_unit_to_raw(unit);
   std::string arrow_timezone;
+#if GLIB_CHECK_VERSION(2, 58, 0)
   if (time_zone) {
     arrow_timezone = g_time_zone_get_identifier(time_zone);
   }
+#endif
   auto arrow_data_type = arrow::timestamp(arrow_unit, arrow_timezone);
   auto data_type =
     GARROW_TIMESTAMP_DATA_TYPE(g_object_new(GARROW_TYPE_TIMESTAMP_DATA_TYPE,

From c6ab28677ddf22799f3db277137708ac5b070acd Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 30 Jan 2024 09:16:53 +0100
Subject: [PATCH 08/74] GH-39640: [Docs] Pin pydata-sphinx-theme to 0.14.*
 (#39758)

### Rationale for this change

Fixing the pinning syntax so we get the latest 0.14.x version (which is currently 0.14.4)

* Closes: #39640

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 ci/conda_env_sphinx.txt            | 2 +-
 docs/requirements.txt              | 2 +-
 docs/source/python/api/compute.rst | 2 +-
 docs/source/python/compute.rst     | 4 ++--
 docs/source/python/pandas.rst      | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/ci/conda_env_sphinx.txt b/ci/conda_env_sphinx.txt
index d0f494d2e085d..0e50875fc1ef8 100644
--- a/ci/conda_env_sphinx.txt
+++ b/ci/conda_env_sphinx.txt
@@ -20,7 +20,7 @@ breathe
 doxygen
 ipython
 numpydoc
-pydata-sphinx-theme=0.14.1
+pydata-sphinx-theme=0.14
 sphinx-autobuild
 sphinx-design
 sphinx-copybutton
diff --git a/docs/requirements.txt b/docs/requirements.txt
index aee2eb662c06b..5d6fec7ddf72e 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -5,7 +5,7 @@
 breathe
 ipython
 numpydoc
-pydata-sphinx-theme==0.14.1
+pydata-sphinx-theme~=0.14
 sphinx-autobuild
 sphinx-design
 sphinx-copybutton
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index b879643017a90..928c607d139ce 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -590,4 +590,4 @@ User-Defined Functions
    :toctree: ../generated/
 
    register_scalar_function
-   ScalarUdfContext
+   UdfContext
diff --git a/docs/source/python/compute.rst b/docs/source/python/compute.rst
index e8a5b613c6099..c02059a4f8faa 100644
--- a/docs/source/python/compute.rst
+++ b/docs/source/python/compute.rst
@@ -445,9 +445,9 @@ output type need to be defined. Using :func:`pyarrow.compute.register_scalar_fun
 
 The implementation of a user-defined function always takes a first *context*
 parameter (named ``ctx`` in the example above) which is an instance of
-:class:`pyarrow.compute.ScalarUdfContext`.
+:class:`pyarrow.compute.UdfContext`.
 This context exposes several useful attributes, particularly a
-:attr:`~pyarrow.compute.ScalarUdfContext.memory_pool` to be used for
+:attr:`~pyarrow.compute.UdfContext.memory_pool` to be used for
 allocations in the context of the user-defined function.
 
 You can call a user-defined function directly using :func:`pyarrow.compute.call_function`:
diff --git a/docs/source/python/pandas.rst b/docs/source/python/pandas.rst
index fda90c4f2a58c..23a4b73bd0965 100644
--- a/docs/source/python/pandas.rst
+++ b/docs/source/python/pandas.rst
@@ -197,7 +197,7 @@ use the ``datetime64[ns]`` type in Pandas and are converted to an Arrow
 
 .. ipython:: python
 
-   df = pd.DataFrame({"datetime": pd.date_range("2020-01-01T00:00:00Z", freq="H", periods=3)})
+   df = pd.DataFrame({"datetime": pd.date_range("2020-01-01T00:00:00Z", freq="h", periods=3)})
    df.dtypes
    df
 

From 787afa1594586d2d556d21471647f9cd2c55b18f Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 30 Jan 2024 12:54:19 +0100
Subject: [PATCH 09/74] GH-39651: [Python] Basic pyarrow bindings for
 Binary/StringView classes (#39652)

### Rationale for this change

First step for https://github.com/apache/arrow/issues/39633: exposing the Array, DataType and Scalar classes for BinaryView and StringView, such that those can already be represented in pyarrow.

(I exposed a variant of StringBuilder as well, just for now to be able to create test data)

* Closes: #39651

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 docs/source/python/api/arrays.rst          |  4 ++
 docs/source/python/api/datatypes.rst       |  4 ++
 python/pyarrow/__init__.py                 |  7 ++-
 python/pyarrow/array.pxi                   | 14 +++++
 python/pyarrow/builder.pxi                 | 66 ++++++++++++++++++++++
 python/pyarrow/includes/libarrow.pxd       |  9 +++
 python/pyarrow/lib.pxd                     |  8 +++
 python/pyarrow/lib.pyx                     |  2 +
 python/pyarrow/scalar.pxi                  | 10 ++++
 python/pyarrow/src/arrow/python/helpers.cc |  2 +
 python/pyarrow/tests/test_builder.py       | 21 ++++++-
 python/pyarrow/tests/test_misc.py          |  4 ++
 python/pyarrow/tests/test_scalars.py       | 28 ++++++++-
 python/pyarrow/tests/test_types.py         |  8 +++
 python/pyarrow/types.pxi                   | 32 +++++++++++
 python/pyarrow/types.py                    | 10 ++++
 16 files changed, 223 insertions(+), 6 deletions(-)

diff --git a/docs/source/python/api/arrays.rst b/docs/source/python/api/arrays.rst
index 73b5e063ff1a0..b858862dcff01 100644
--- a/docs/source/python/api/arrays.rst
+++ b/docs/source/python/api/arrays.rst
@@ -63,6 +63,8 @@ may expose data type-specific methods or properties.
    FixedSizeBinaryArray
    LargeBinaryArray
    LargeStringArray
+   BinaryViewArray,
+   StringViewArray,
    Time32Array
    Time64Array
    Date32Array
@@ -119,6 +121,8 @@ classes may expose data type-specific methods or properties.
    FixedSizeBinaryScalar
    LargeBinaryScalar
    LargeStringScalar
+   BinaryViewScalar
+   StringViewScalar
    Time32Scalar
    Time64Scalar
    Date32Scalar
diff --git a/docs/source/python/api/datatypes.rst b/docs/source/python/api/datatypes.rst
index 4066ef314234d..642c243b21af0 100644
--- a/docs/source/python/api/datatypes.rst
+++ b/docs/source/python/api/datatypes.rst
@@ -55,6 +55,8 @@ These should be used to create Arrow data types and schemas.
    large_binary
    large_string
    large_utf8
+   binary_view
+   string_view
    decimal128
    list_
    large_list
@@ -168,6 +170,8 @@ represents a given data type (such as ``int32``) or general category
    is_large_binary
    is_large_unicode
    is_large_string
+   is_binary_view
+   is_string_view
    is_fixed_size_binary
    is_map
    is_dictionary
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 9da94885ec6b2..4dbd1258d3cea 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -163,7 +163,7 @@ def print_entry(label, value):
                          time32, time64, timestamp, date32, date64, duration,
                          month_day_nano_interval,
                          float16, float32, float64,
-                         binary, string, utf8,
+                         binary, string, utf8, binary_view, string_view,
                          large_binary, large_string, large_utf8,
                          decimal128, decimal256,
                          list_, large_list, map_, struct,
@@ -205,6 +205,7 @@ def print_entry(label, value):
                          FixedSizeListArray, UnionArray,
                          BinaryArray, StringArray,
                          LargeBinaryArray, LargeStringArray,
+                         BinaryViewArray, StringViewArray,
                          FixedSizeBinaryArray,
                          DictionaryArray,
                          Date32Array, Date64Array, TimestampArray,
@@ -223,8 +224,8 @@ def print_entry(label, value):
                          Time32Scalar, Time64Scalar,
                          TimestampScalar, DurationScalar,
                          MonthDayNanoIntervalScalar,
-                         BinaryScalar, LargeBinaryScalar,
-                         StringScalar, LargeStringScalar,
+                         BinaryScalar, LargeBinaryScalar, BinaryViewScalar,
+                         StringScalar, LargeStringScalar, StringViewScalar,
                          FixedSizeBinaryScalar, DictionaryScalar,
                          MapScalar, StructScalar, UnionScalar,
                          RunEndEncodedScalar, ExtensionScalar)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 1416f5f4346d9..1029f3a629817 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -2942,6 +2942,12 @@ cdef class LargeStringArray(Array):
                                   null_count, offset)
 
 
+cdef class StringViewArray(Array):
+    """
+    Concrete class for Arrow arrays of string (or utf8) view data type.
+    """
+
+
 cdef class BinaryArray(Array):
     """
     Concrete class for Arrow arrays of variable-sized binary data type.
@@ -2968,6 +2974,12 @@ cdef class LargeBinaryArray(Array):
         return (<CLargeBinaryArray*> self.ap).total_values_length()
 
 
+cdef class BinaryViewArray(Array):
+    """
+    Concrete class for Arrow arrays of variable-sized binary view data type.
+    """
+
+
 cdef class DictionaryArray(Array):
     """
     Concrete class for dictionary-encoded Arrow arrays.
@@ -3669,6 +3681,8 @@ cdef dict _array_classes = {
     _Type_STRING: StringArray,
     _Type_LARGE_BINARY: LargeBinaryArray,
     _Type_LARGE_STRING: LargeStringArray,
+    _Type_BINARY_VIEW: BinaryViewArray,
+    _Type_STRING_VIEW: StringViewArray,
     _Type_DICTIONARY: DictionaryArray,
     _Type_FIXED_SIZE_BINARY: FixedSizeBinaryArray,
     _Type_DECIMAL128: Decimal128Array,
diff --git a/python/pyarrow/builder.pxi b/python/pyarrow/builder.pxi
index a34ea5412e14a..2af39e2c589e6 100644
--- a/python/pyarrow/builder.pxi
+++ b/python/pyarrow/builder.pxi
@@ -80,3 +80,69 @@ cdef class StringBuilder(_Weakrefable):
 
     def __len__(self):
         return self.builder.get().length()
+
+
+cdef class StringViewBuilder(_Weakrefable):
+    """
+    Builder class for UTF8 string views.
+
+    This class exposes facilities for incrementally adding string values and
+    building the null bitmap for a pyarrow.Array (type='string_view').
+    """
+    cdef:
+        unique_ptr[CStringViewBuilder] builder
+
+    def __cinit__(self, MemoryPool memory_pool=None):
+        cdef CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
+        self.builder.reset(new CStringViewBuilder(pool))
+
+    def append(self, value):
+        """
+        Append a single value to the builder.
+
+        The value can either be a string/bytes object or a null value
+        (np.nan or None).
+
+        Parameters
+        ----------
+        value : string/bytes or np.nan/None
+            The value to append to the string array builder.
+        """
+        if value is None or value is np.nan:
+            self.builder.get().AppendNull()
+        elif isinstance(value, (bytes, str)):
+            self.builder.get().Append(tobytes(value))
+        else:
+            raise TypeError('StringViewBuilder only accepts string objects')
+
+    def append_values(self, values):
+        """
+        Append all the values from an iterable.
+
+        Parameters
+        ----------
+        values : iterable of string/bytes or np.nan/None values
+            The values to append to the string array builder.
+        """
+        for value in values:
+            self.append(value)
+
+    def finish(self):
+        """
+        Return result of builder as an Array object; also resets the builder.
+
+        Returns
+        -------
+        array : pyarrow.Array
+        """
+        cdef shared_ptr[CArray] out
+        with nogil:
+            self.builder.get().Finish(&out)
+        return pyarrow_wrap_array(out)
+
+    @property
+    def null_count(self):
+        return self.builder.get().null_count()
+
+    def __len__(self):
+        return self.builder.get().length()
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 74e92594b04e5..d92f09da779b6 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -126,6 +126,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         _Type_LARGE_BINARY" arrow::Type::LARGE_BINARY"
         _Type_LARGE_STRING" arrow::Type::LARGE_STRING"
         _Type_FIXED_SIZE_BINARY" arrow::Type::FIXED_SIZE_BINARY"
+        _Type_BINARY_VIEW" arrow::Type::BINARY_VIEW"
+        _Type_STRING_VIEW" arrow::Type::STRING_VIEW"
 
         _Type_LIST" arrow::Type::LIST"
         _Type_LARGE_LIST" arrow::Type::LARGE_LIST"
@@ -1295,7 +1297,14 @@ cdef extern from "arrow/builder.h" namespace "arrow" nogil:
 
     cdef cppclass CStringBuilder" arrow::StringBuilder"(CBinaryBuilder):
         CStringBuilder(CMemoryPool* pool)
+        CStatus Append(const c_string& value)
+
+    cdef cppclass CBinaryViewBuilder" arrow::BinaryViewBuilder"(CArrayBuilder):
+        CBinaryViewBuilder(shared_ptr[CDataType], CMemoryPool* pool)
+        CStatus Append(const char* value, int32_t length)
 
+    cdef cppclass CStringViewBuilder" arrow::StringViewBuilder"(CBinaryViewBuilder):
+        CStringViewBuilder(CMemoryPool* pool)
         CStatus Append(const c_string& value)
 
     cdef cppclass CTimestampBuilder "arrow::TimestampBuilder"(CArrayBuilder):
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 58ec34addbc0a..c1104864066e9 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -445,6 +445,14 @@ cdef class BinaryArray(Array):
     pass
 
 
+cdef class StringViewArray(Array):
+    pass
+
+
+cdef class BinaryViewArray(Array):
+    pass
+
+
 cdef class DictionaryArray(Array):
     cdef:
         object _indices, _dictionary
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index 29a0bed55949c..b0368b67f790e 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -106,6 +106,8 @@ Type_STRING = _Type_STRING
 Type_LARGE_BINARY = _Type_LARGE_BINARY
 Type_LARGE_STRING = _Type_LARGE_STRING
 Type_FIXED_SIZE_BINARY = _Type_FIXED_SIZE_BINARY
+Type_BINARY_VIEW = _Type_BINARY_VIEW
+Type_STRING_VIEW = _Type_STRING_VIEW
 Type_LIST = _Type_LIST
 Type_LARGE_LIST = _Type_LARGE_LIST
 Type_MAP = _Type_MAP
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 9a66dc81226d4..2772acf81861c 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -665,6 +665,14 @@ cdef class LargeStringScalar(StringScalar):
     pass
 
 
+cdef class BinaryViewScalar(BinaryScalar):
+    pass
+
+
+cdef class StringViewScalar(StringScalar):
+    pass
+
+
 cdef class ListScalar(Scalar):
     """
     Concrete class for list-like scalars.
@@ -1051,8 +1059,10 @@ cdef dict _scalar_classes = {
     _Type_BINARY: BinaryScalar,
     _Type_LARGE_BINARY: LargeBinaryScalar,
     _Type_FIXED_SIZE_BINARY: FixedSizeBinaryScalar,
+    _Type_BINARY_VIEW: BinaryViewScalar,
     _Type_STRING: StringScalar,
     _Type_LARGE_STRING: LargeStringScalar,
+    _Type_STRING_VIEW: StringViewScalar,
     _Type_LIST: ListScalar,
     _Type_LARGE_LIST: LargeListScalar,
     _Type_FIXED_SIZE_LIST: FixedSizeListScalar,
diff --git a/python/pyarrow/src/arrow/python/helpers.cc b/python/pyarrow/src/arrow/python/helpers.cc
index c266abc169d49..2c86c86a919be 100644
--- a/python/pyarrow/src/arrow/python/helpers.cc
+++ b/python/pyarrow/src/arrow/python/helpers.cc
@@ -63,6 +63,8 @@ std::shared_ptr<DataType> GetPrimitiveType(Type::type type) {
       GET_PRIMITIVE_TYPE(STRING, utf8);
       GET_PRIMITIVE_TYPE(LARGE_BINARY, large_binary);
       GET_PRIMITIVE_TYPE(LARGE_STRING, large_utf8);
+      GET_PRIMITIVE_TYPE(BINARY_VIEW, binary_view);
+      GET_PRIMITIVE_TYPE(STRING_VIEW, utf8_view);
       GET_PRIMITIVE_TYPE(INTERVAL_MONTH_DAY_NANO, month_day_nano_interval);
     default:
       return nullptr;
diff --git a/python/pyarrow/tests/test_builder.py b/python/pyarrow/tests/test_builder.py
index 50d801026b7d8..abc8a0013df37 100644
--- a/python/pyarrow/tests/test_builder.py
+++ b/python/pyarrow/tests/test_builder.py
@@ -20,7 +20,7 @@
 import numpy as np
 
 import pyarrow as pa
-from pyarrow.lib import StringBuilder
+from pyarrow.lib import StringBuilder, StringViewBuilder
 
 
 def test_weakref():
@@ -65,3 +65,22 @@ def test_string_builder_append_after_finish():
     sbuilder.append("No effect")
     expected = [None, None, "text", None, "other text"]
     assert arr.to_pylist() == expected
+
+
+def test_string_view_builder():
+    builder = StringViewBuilder()
+    builder.append(b"a byte string")
+    builder.append("a string")
+    builder.append("a longer not-inlined string")
+    builder.append(np.nan)
+    builder.append_values([None, "text"])
+    assert len(builder) == 6
+    assert builder.null_count == 2
+    arr = builder.finish()
+    assert isinstance(arr, pa.Array)
+    assert arr.null_count == 2
+    assert arr.type == 'string_view'
+    expected = [
+        "a byte string", "a string", "a longer not-inlined string", None, None, "text"
+    ]
+    assert arr.to_pylist() == expected
diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py
index 8b8c50882b749..8cec8783280dd 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -185,6 +185,8 @@ def test_set_timezone_db_path_non_windows():
     pa.UnionArray,
     pa.BinaryArray,
     pa.StringArray,
+    pa.BinaryViewArray,
+    pa.StringViewArray,
     pa.FixedSizeBinaryArray,
     pa.DictionaryArray,
     pa.Date32Array,
@@ -221,6 +223,8 @@ def test_set_timezone_db_path_non_windows():
     pa.StringScalar,
     pa.BinaryScalar,
     pa.FixedSizeBinaryScalar,
+    pa.BinaryViewScalar,
+    pa.StringViewScalar,
     pa.ListScalar,
     pa.LargeListScalar,
     pa.MapScalar,
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 74dee59558239..4a239b23d5676 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -51,6 +51,9 @@
     (b"bytes", None, pa.BinaryScalar),
     ("largestring", pa.large_string(), pa.LargeStringScalar),
     (b"largebytes", pa.large_binary(), pa.LargeBinaryScalar),
+    # TODO(GH-39633) pa.scalar(..) requires python->arrow conversion to be implemented
+    # ("string_view", pa.string_view(), pa.StringViewScalar),
+    # (b"bytes_view", pa.binary_view(), pa.BinaryViewScalar),
     (b"abc", pa.binary(3), pa.FixedSizeBinaryScalar),
     ([1, 2, 3], None, pa.ListScalar),
     ([1, 2, 3, 4], pa.large_list(pa.int8()), pa.LargeListScalar),
@@ -488,7 +491,8 @@ def test_month_day_nano_interval():
 @pytest.mark.parametrize('value', ['foo', 'mañana'])
 @pytest.mark.parametrize(('ty', 'scalar_typ'), [
     (pa.string(), pa.StringScalar),
-    (pa.large_string(), pa.LargeStringScalar)
+    (pa.large_string(), pa.LargeStringScalar),
+    # (pa.string_view(), pa.StringViewScalar),
 ])
 def test_string(value, ty, scalar_typ):
     s = pa.scalar(value, type=ty)
@@ -503,10 +507,30 @@ def test_string(value, ty, scalar_typ):
     assert buf.to_pybytes() == value.encode()
 
 
+@pytest.mark.parametrize('value', ['foo', 'mañana'])
+def test_string_view(value):
+    # TODO: replace with normal scalar construction
+    builder = pa.lib.StringViewBuilder()
+    builder.append(value)
+    arr = builder.finish()
+
+    s = arr[0]
+    assert isinstance(s, pa.StringViewScalar)
+    assert s.as_py() == value
+    assert s.as_py() != 'something'
+    assert repr(value) in repr(s)
+    assert str(s) == str(value)
+
+    buf = s.as_buffer()
+    assert isinstance(buf, pa.Buffer)
+    assert buf.to_pybytes() == value.encode()
+
+
 @pytest.mark.parametrize('value', [b'foo', b'bar'])
 @pytest.mark.parametrize(('ty', 'scalar_typ'), [
     (pa.binary(), pa.BinaryScalar),
-    (pa.large_binary(), pa.LargeBinaryScalar)
+    (pa.large_binary(), pa.LargeBinaryScalar),
+    # (pa.binary_view(), pa.BinaryViewScalar),
 ])
 def test_binary(value, ty, scalar_typ):
     s = pa.scalar(value, type=ty)
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index c8a52c6b626c2..a5ab3128dc874 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -61,6 +61,8 @@ def get_many_types():
         pa.binary(10),
         pa.large_string(),
         pa.large_binary(),
+        pa.string_view(),
+        pa.binary_view(),
         pa.list_(pa.int32()),
         pa.list_(pa.int32(), 2),
         pa.large_list(pa.uint16()),
@@ -244,6 +246,12 @@ def test_is_binary_string():
     assert types.is_fixed_size_binary(pa.binary(5))
     assert not types.is_fixed_size_binary(pa.binary())
 
+    assert types.is_string_view(pa.string_view())
+    assert not types.is_string_view(pa.string())
+    assert types.is_binary_view(pa.binary_view())
+    assert not types.is_binary_view(pa.binary())
+    assert not types.is_binary_view(pa.string_view())
+
 
 def test_is_temporal_date_time_timestamp():
     date_types = [pa.date32(), pa.date64()]
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index b6dc53d633543..ce3736b5af847 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -4375,6 +4375,36 @@ def large_utf8():
     return large_string()
 
 
+def binary_view():
+    """
+    Create a variable-length binary view type.
+
+    Examples
+    --------
+    Create an instance of a string type:
+
+    >>> import pyarrow as pa
+    >>> pa.binary_view()
+    DataType(binary_view)
+    """
+    return primitive_type(_Type_BINARY_VIEW)
+
+
+def string_view():
+    """
+    Create UTF8 variable-length string view type.
+
+    Examples
+    --------
+    Create an instance of a string type:
+
+    >>> import pyarrow as pa
+    >>> pa.string_view()
+    DataType(string_view)
+    """
+    return primitive_type(_Type_STRING_VIEW)
+
+
 def list_(value_type, int list_size=-1):
     """
     Create ListType instance from child data type or field.
@@ -4991,6 +5021,8 @@ cdef dict _type_aliases = {
     'large_str': large_string,
     'large_utf8': large_string,
     'large_binary': large_binary,
+    'binary_view': binary_view,
+    'string_view': string_view,
     'date32': date32,
     'date64': date64,
     'date32[day]': date32,
diff --git a/python/pyarrow/types.py b/python/pyarrow/types.py
index 5d7dbe4b451b9..32398dac9c5f5 100644
--- a/python/pyarrow/types.py
+++ b/python/pyarrow/types.py
@@ -243,6 +243,16 @@ def is_fixed_size_binary(t):
     return t.id == lib.Type_FIXED_SIZE_BINARY
 
 
+@doc(is_null, datatype="variable-length binary view")
+def is_binary_view(t):
+    return t.id == lib.Type_BINARY_VIEW
+
+
+@doc(is_null, datatype="variable-length string (utf-8) view")
+def is_string_view(t):
+    return t.id == lib.Type_STRING_VIEW
+
+
 @doc(is_null, datatype="date")
 def is_date(t):
     return t.id in _DATE_TYPES

From 749f936fc77b83d3c0ec5642c16561b3afa5dfa7 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Wed, 31 Jan 2024 08:24:11 -0800
Subject: [PATCH 10/74] MINOR: [CI] update weston codeowners (#39867)

### Rationale for this change

Currently I am unable to keep up with my Github inbox and thus respond to very little.  I am trying to balance this.

### What changes are included in this PR?

Reduce the scope of files that will trigger automated review.

### Are these changes tested?

N/A

### Are there any user-facing changes?

No

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Weston Pace <weston.pace@gmail.com>
---
 .github/CODEOWNERS | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 41a075b1c0bcb..e7e544c2b0e62 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -30,15 +30,10 @@
 # /cpp/
 /cpp/src/arrow/acero @westonpace
 /cpp/src/arrow/adapters/orc @wgtmac
-/cpp/src/arrow/dataset @westonpace
 /cpp/src/arrow/engine @westonpace
 /cpp/src/arrow/flight/ @lidavidm
-/cpp/src/arrow/util/async* @westonpace
-/cpp/src/arrow/util/future* @westonpace
-/cpp/src/arrow/util/thread* @westonpace
 /cpp/src/parquet @wgtmac
-/cpp/src/skyhook @westonpace
-/csharp/ @westonpace
+/csharp/ @curthagenlocher
 /go/ @zeroshade
 /java/ @lidavidm
 /js/ @domoritz @trxcllnt

From 2a87693134135a8af2ae2b6df41980176431b1c0 Mon Sep 17 00:00:00 2001
From: david dali susanibar arce <davi.sarces@gmail.com>
Date: Wed, 31 Jan 2024 13:38:54 -0500
Subject: [PATCH 11/74] GH-39680: [Java] enable half float support on Java
 module (#39681)

### Rationale for this change

- To enable half float support on Java module.

### What changes are included in this PR?

- [x] Add initial Float16 type support
- [x] Unit test
- [x] Integration test
- [x] Documentation

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No
* Closes: #39680

Authored-by: david dali susanibar arce <davi.sarces@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 docs/source/status.rst                        |   9 +-
 .../apache/arrow/dataset/TestAllTypes.java    |   6 +-
 .../org/apache/arrow/memory/util/Float16.java | 271 +++++++++++
 .../org/apache/arrow/memory/TestArrowBuf.java |  11 +
 .../main/codegen/data/ValueVectorTypes.tdd    |  10 +
 .../main/codegen/templates/UnionReader.java   |   6 +-
 .../org/apache/arrow/vector/Float2Vector.java | 434 ++++++++++++++++++
 .../org/apache/arrow/vector/types/Types.java  |  16 +-
 .../apache/arrow/vector/TestValueVector.java  | 198 ++++++++
 9 files changed, 953 insertions(+), 8 deletions(-)
 create mode 100644 java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java
 create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java

diff --git a/docs/source/status.rst b/docs/source/status.rst
index 03a87012342c2..11dd9c2c2965c 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -40,7 +40,7 @@ Data Types
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | UInt8/16/32/64    | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Float16           | ✓ (1) |       | ✓     | ✓          |  ✓ (2)|  ✓    | ✓     |       |
+| Float16           | ✓ (1) | ✓ (2) | ✓     | ✓          |  ✓ (3)|  ✓    | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Float32/64        | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -104,7 +104,7 @@ Data Types
 | Data type         | C++   | Java  | Go    | JavaScript | C#    | Rust  | Julia | Swift |
 | (special)         |       |       |       |            |       |       |       |       |
 +===================+=======+=======+=======+============+=======+=======+=======+=======+
-| Dictionary        | ✓     | ✓ (3) | ✓     | ✓          | ✓     | ✓ (3) | ✓     |       |
+| Dictionary        | ✓     | ✓ (4) | ✓     | ✓          | ✓     | ✓ (3) | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Extension         | ✓     | ✓     | ✓     |            |       | ✓     | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -114,8 +114,9 @@ Data Types
 Notes:
 
 * \(1) Casting to/from Float16 in C++ is not supported.
-* \(2) Float16 support in C# is only available when targeting .NET 6+.
-* \(3) Nested dictionaries not supported
+* \(2) Casting to/from Float16 in Java is not supported.
+* \(3) Float16 support in C# is only available when targeting .NET 6+.
+* \(4) Nested dictionaries not supported
 
 .. seealso::
    The :ref:`format_columnar` specification.
diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java b/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
index 13b247452348d..6d33cf057ed3a 100644
--- a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
@@ -32,6 +32,7 @@
 import org.apache.arrow.dataset.file.DatasetFileWriter;
 import org.apache.arrow.dataset.file.FileFormat;
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.Float16;
 import org.apache.arrow.vector.BigIntVector;
 import org.apache.arrow.vector.BitVector;
 import org.apache.arrow.vector.DateMilliVector;
@@ -39,6 +40,7 @@
 import org.apache.arrow.vector.DecimalVector;
 import org.apache.arrow.vector.DurationVector;
 import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float2Vector;
 import org.apache.arrow.vector.Float4Vector;
 import org.apache.arrow.vector.Float8Vector;
 import org.apache.arrow.vector.IntVector;
@@ -89,7 +91,6 @@ public class TestAllTypes extends TestDataset {
 
   private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) {
     // Notes:
-    // - Float16 is not supported by Java.
     // - IntervalMonthDayNano is not supported by Parquet.
     // - Map (GH-38250) and SparseUnion are resulting in serialization errors when writing with the Dataset API.
     // "Unhandled type for Arrow to Parquet schema conversion" errors: IntervalDay, IntervalYear, DenseUnion
@@ -109,6 +110,7 @@ private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) {
         Field.nullablePrimitive("uint16", new ArrowType.Int(16, false)),
         Field.nullablePrimitive("uint32", new ArrowType.Int(32, false)),
         Field.nullablePrimitive("uint64", new ArrowType.Int(64, false)),
+        Field.nullablePrimitive("float16", new ArrowType.FloatingPoint(FloatingPointPrecision.HALF)),
         Field.nullablePrimitive("float32", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
         Field.nullablePrimitive("float64", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
         Field.nullablePrimitive("utf8", ArrowType.Utf8.INSTANCE),
@@ -148,6 +150,7 @@ private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) {
     root.getVector("uint16").setNull(0);
     root.getVector("uint32").setNull(0);
     root.getVector("uint64").setNull(0);
+    root.getVector("float16").setNull(0);
     root.getVector("float32").setNull(0);
     root.getVector("float64").setNull(0);
     root.getVector("utf8").setNull(0);
@@ -180,6 +183,7 @@ private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) {
     ((UInt2Vector) root.getVector("uint16")).set(1, 1);
     ((UInt4Vector) root.getVector("uint32")).set(1, 1);
     ((UInt8Vector) root.getVector("uint64")).set(1, 1);
+    ((Float2Vector) root.getVector("float16")).set(1, Float16.toFloat16(+32.875f));
     ((Float4Vector) root.getVector("float32")).set(1, 1.0f);
     ((Float8Vector) root.getVector("float64")).set(1, 1.0);
     ((VarCharVector) root.getVector("utf8")).set(1, new Text("a"));
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java
new file mode 100644
index 0000000000000..8040158fd090e
--- /dev/null
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+
+import org.apache.arrow.util.VisibleForTesting;
+
+/**
+ * Lifted from Apache Parquet MR project:
+ * https://github.com/apache/parquet-mr/blob/e87b80308869b77f914fcfd04364686e11158950/parquet-column/src/main/java/org/apache/parquet/schema/Float16.java
+ * <ul>
+ * Changes made:
+ * <li>Modify the data type input from Parquet-MR Binary (toFloat(Binary b)) to Arrow Java short (toFloat(short b))</li>
+ * <li>Expose NAN and POSITIVE_INFINITY variables</li>
+ * </ul>
+ *
+ *
+ * The class is a utility class to manipulate half-precision 16-bit
+ * <a href="https://en.wikipedia.org/wiki/Half-precision_floating-point_format">IEEE 754</a>
+ * floating point data types (also called fp16 or binary16). A half-precision float can be
+ * created from or converted to single-precision floats, and is stored in a short data type.
+ * The IEEE 754 standard specifies an float16 as having the following format:
+ * <ul>
+ * <li>Sign bit: 1 bit</li>
+ * <li>Exponent width: 5 bits</li>
+ * <li>Significand: 10 bits</li>
+ * </ul>
+ *
+ * <p>The format is laid out as follows:</p>
+ * <pre>
+ * 1   11111   1111111111
+ * ^   --^--   -----^----
+ * sign  |          |_______ significand
+ *       |
+ *      -- exponent
+ * </pre>
+ * Half-precision floating points can be useful to save memory and/or
+ * bandwidth at the expense of range and precision when compared to single-precision
+ * floating points (float32).
+ * Ref: https://android.googlesource.com/platform/libcore/+/master/luni/src/main/java/libcore/util/FP16.java
+ */
+public class Float16 {
+  // Positive infinity of type half-precision float.
+  public static final short POSITIVE_INFINITY = (short) 0x7c00;
+  // A Not-a-Number representation of a half-precision float.
+  public static final short NaN = (short) 0x7e00;
+  // The bitmask to and a number with to obtain the sign bit.
+  private static final int SIGN_MASK = 0x8000;
+  // The offset to shift by to obtain the exponent bits.
+  private static final int EXPONENT_SHIFT = 10;
+  // The bitmask to and a number shifted by EXPONENT_SHIFT right, to obtain exponent bits.
+  private static final int SHIFTED_EXPONENT_MASK = 0x1f;
+  // The bitmask to and a number with to obtain significand bits.
+  private static final int SIGNIFICAND_MASK = 0x3ff;
+  // The offset of the exponent from the actual value.
+  private static final int EXPONENT_BIAS = 15;
+  // The offset to shift by to obtain the sign bit.
+  private static final int SIGN_SHIFT = 15;
+  // The bitmask to AND with to obtain exponent and significand bits.
+  private static final int EXPONENT_SIGNIFICAND_MASK = 0x7fff;
+
+  private static final int FP32_SIGN_SHIFT = 31;
+  private static final int FP32_EXPONENT_SHIFT = 23;
+  private static final int FP32_SHIFTED_EXPONENT_MASK = 0xff;
+  private static final int FP32_SIGNIFICAND_MASK = 0x7fffff;
+  private static final int FP32_EXPONENT_BIAS = 127;
+  private static final int FP32_QNAN_MASK = 0x400000;
+  private static final int FP32_DENORMAL_MAGIC = 126 << 23;
+  private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC);
+
+  /**
+   * Returns true if the specified half-precision float value represents
+   * a Not-a-Number, false otherwise.
+   *
+   * @param h A half-precision float value
+   * @return True if the value is a NaN, false otherwise
+   *
+   */
+  @VisibleForTesting
+  public static boolean isNaN(short h) {
+    return (h & EXPONENT_SIGNIFICAND_MASK) > POSITIVE_INFINITY;
+  }
+
+  /**
+   * <p>Compares the two specified half-precision float values. The following
+   * conditions apply during the comparison:</p>
+   *
+   * <ul>
+   * <li>NaN is considered by this method to be equal to itself and greater
+   * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li>
+   * <li>POSITIVE_ZERO is considered by this method to be greater than NEGATIVE_ZERO.</li>
+   * </ul>
+   *
+   * @param x The first half-precision float value to compare.
+   * @param y The second half-precision float value to compare
+   *
+   * @return  The value {@code 0} if {@code x} is numerically equal to {@code y}, a
+   *          value less than {@code 0} if {@code x} is numerically less than {@code y},
+   *          and a value greater than {@code 0} if {@code x} is numerically greater
+   *          than {@code y}
+   *
+   */
+  @VisibleForTesting
+  public static int compare(short x, short y) {
+    boolean xIsNaN = isNaN(x);
+    boolean yIsNaN = isNaN(y);
+
+    if (!xIsNaN && !yIsNaN) {
+      int first = ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff);
+      int second = ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
+      // Returns true if the first half-precision float value is less
+      // (smaller toward negative infinity) than the second half-precision float value.
+      if (first < second) {
+        return -1;
+      }
+
+      // Returns true if the first half-precision float value is greater
+      // (larger toward positive infinity) than the second half-precision float value.
+      if (first > second) {
+        return 1;
+      }
+    }
+
+    // Collapse NaNs, akin to halfToIntBits(), but we want to keep
+    // (signed) short value types to preserve the ordering of -0.0
+    // and +0.0
+    short xBits = xIsNaN ? NaN : x;
+    short yBits = yIsNaN ? NaN : y;
+    return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1));
+  }
+
+  /**
+   * Converts the specified half-precision float value into a
+   * single-precision float value. The following special cases are handled:
+   * If the input is NaN, the returned value is Float NaN.
+   * If the input is POSITIVE_INFINITY or NEGATIVE_INFINITY, the returned value is respectively
+   *   Float POSITIVE_INFINITY or Float NEGATIVE_INFINITY.
+   * If the input is 0 (positive or negative), the returned value is +/-0.0f.
+   * Otherwise, the returned value is a normalized single-precision float value.
+   *
+   * @param b The half-precision float value to convert to single-precision
+   * @return A normalized single-precision float value
+   */
+  @VisibleForTesting
+  public static float toFloat(short b) {
+    int bits = b & 0xffff;
+    int s = bits & SIGN_MASK;
+    int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK;
+    int m = (bits) & SIGNIFICAND_MASK;
+    int outE = 0;
+    int outM = 0;
+    if (e == 0) { // Denormal or 0
+      if (m != 0) {
+        // Convert denorm fp16 into normalized fp32
+        float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m);
+        o -= FP32_DENORMAL_FLOAT;
+        return s == 0 ? o : -o;
+      }
+    } else {
+      outM = m << 13;
+      if (e == 0x1f) { // Infinite or NaN
+        outE = 0xff;
+        if (outM != 0) { // SNaNs are quieted
+          outM |= FP32_QNAN_MASK;
+        }
+      } else {
+        outE = e - EXPONENT_BIAS + FP32_EXPONENT_BIAS;
+      }
+    }
+    int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM;
+    return Float.intBitsToFloat(out);
+  }
+
+  /**
+   * Converts the specified single-precision float value into a
+   * half-precision float value. The following special cases are handled:
+   *
+   * If the input is NaN, the returned value is NaN.
+   * If the input is Float POSITIVE_INFINITY or Float NEGATIVE_INFINITY,
+   *   the returned value is respectively POSITIVE_INFINITY or NEGATIVE_INFINITY.
+   * If the input is 0 (positive or negative), the returned value is
+   *   POSITIVE_ZERO or NEGATIVE_ZERO.
+   * If the input is a less than MIN_VALUE, the returned value
+   *   is flushed to POSITIVE_ZERO or NEGATIVE_ZERO.
+   * If the input is a less than MIN_NORMAL, the returned value
+   *   is a denorm half-precision float.
+   * Otherwise, the returned value is rounded to the nearest
+   *   representable half-precision float value.
+   *
+   * @param f The single-precision float value to convert to half-precision
+   * @return A half-precision float value
+   */
+  public static short toFloat16(float f) {
+    int bits = Float.floatToRawIntBits(f);
+    int s = (bits >>> FP32_SIGN_SHIFT);
+    int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_SHIFTED_EXPONENT_MASK;
+    int m = (bits) & FP32_SIGNIFICAND_MASK;
+    int outE = 0;
+    int outM = 0;
+    if (e == 0xff) { // Infinite or NaN
+      outE = 0x1f;
+      outM = m != 0 ? 0x200 : 0;
+    } else {
+      e = e - FP32_EXPONENT_BIAS + EXPONENT_BIAS;
+      if (e >= 0x1f) { // Overflow
+        outE = 0x1f;
+      } else if (e <= 0) { // Underflow
+        if (e < -10) {
+          // The absolute fp32 value is less than MIN_VALUE, flush to +/-0
+        } else {
+          // The fp32 value is a normalized float less than MIN_NORMAL,
+          // we convert to a denorm fp16
+          m = m | 0x800000;
+          int shift = 14 - e;
+          outM = m >> shift;
+          int lowm = m & ((1 << shift) - 1);
+          int hway = 1 << (shift - 1);
+          // if above halfway or exactly halfway and outM is odd
+          if (lowm + (outM & 1) > hway) {
+            // Round to nearest even
+            // Can overflow into exponent bit, which surprisingly is OK.
+            // This increment relies on the +outM in the return statement below
+            outM++;
+          }
+        }
+      } else {
+        outE = e;
+        outM = m >> 13;
+        // if above halfway or exactly halfway and outM is odd
+        if ((m & 0x1fff) + (outM & 0x1) > 0x1000) {
+          // Round to nearest even
+          // Can overflow into exponent bit, which surprisingly is OK.
+          // This increment relies on the +outM in the return statement below
+          outM++;
+        }
+      }
+    }
+    // The outM is added here as the +1 increments for outM above can
+    // cause an overflow in the exponent bit which is OK.
+    return (short) ((s << SIGN_SHIFT) | (outE << EXPONENT_SHIFT) + outM);
+  }
+
+  /**
+   * Returns a string representation of the specified half-precision
+   * float value. Calling this method is equivalent to calling
+   * <code>Float.toString(toFloat(h))</code>. See {@link Float#toString(float)}
+   * for more information on the format of the string representation.
+   *
+   * @param h A half-precision float value in binary little-endian format
+   * @return A string representation of the specified value
+   */
+  @VisibleForTesting
+  public static String toFloatString(short h) {
+    return Float.toString(Float16.toFloat(h));
+  }
+}
diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
index 9ba42abc1ce89..b4385b72a38cf 100644
--- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
+++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
@@ -29,6 +29,7 @@
 import java.nio.ByteOrder;
 import java.util.Arrays;
 
+import org.apache.arrow.memory.util.Float16;
 import org.junit.Test;
 import org.slf4j.LoggerFactory;
 
@@ -180,4 +181,14 @@ public void testEnabledHistoricalLog() {
       ((Logger) LoggerFactory.getLogger("org.apache.arrow")).setLevel(null);
     }
   }
+
+  @Test
+  public void testArrowBufFloat16() {
+    try (BufferAllocator allocator = new RootAllocator();
+         ArrowBuf buf = allocator.buffer(1024)
+    ) {
+      buf.setShort(0, Float16.toFloat16(+32.875f));
+      assertEquals((short) 0x501c, buf.getShort(0));
+    }
+  }
 }
diff --git a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
index 2a921804202f0..6c2a967712454 100644
--- a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
+++ b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
@@ -49,6 +49,16 @@
         { class: "SmallInt", valueHolder: "Int2Holder"},
       ]
     },
+    {
+      major: "Fixed",
+      width: 2,
+      javaType: "short",
+      boxedType: "Short",
+      fields: [{name: "value", type: "short"}],
+      minor: [
+        { class: "Float2", valueHolder: "Int2Holder"},
+      ]
+    },
     {
       major: "Fixed",
       width: 4,
diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java
index 56a6cc90b321b..822d4822987fb 100644
--- a/java/vector/src/main/codegen/templates/UnionReader.java
+++ b/java/vector/src/main/codegen/templates/UnionReader.java
@@ -39,7 +39,9 @@
 @SuppressWarnings("unused")
 public class UnionReader extends AbstractFieldReader {
 
-  private BaseReader[] readers = new BaseReader[45];
+  private static final int NUM_SUPPORTED_TYPES = 46;
+
+  private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES];
   public UnionVector data;
   
   public UnionReader(UnionVector data) {
@@ -50,7 +52,7 @@ public MinorType getMinorType() {
     return TYPES[data.getTypeValue(idx())];
   }
 
-  private static MinorType[] TYPES = new MinorType[45];
+  private static MinorType[] TYPES = new MinorType[NUM_SUPPORTED_TYPES];
 
   static {
     for (MinorType minorType : MinorType.values()) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java
new file mode 100644
index 0000000000000..9d3f25769abff
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java
@@ -0,0 +1,434 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.Float16;
+import org.apache.arrow.vector.complex.impl.Float2ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.Float2Holder;
+import org.apache.arrow.vector.holders.NullableFloat2Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * Float2Vector implements a fixed width (2 bytes) vector of
+ * short values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class Float2Vector extends BaseFixedWidthVector implements FloatingPointVector {
+  public static final byte TYPE_WIDTH = 2;
+
+  /**
+   * Instantiate a Float2Vector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param name name of the vector
+   * @param allocator allocator for memory management.
+   */
+  public Float2Vector(String name, BufferAllocator allocator) {
+    this(name, FieldType.nullable(MinorType.FLOAT2.getType()), allocator);
+  }
+
+  /**
+   * Instantiate a Float2Vector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param name name of the vector
+   * @param fieldType type of Field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public Float2Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a Float2Vector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public Float2Vector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
+  }
+
+  @Override
+  protected FieldReader getReaderImpl() {
+    return new Float2ReaderImpl(Float2Vector.this);
+  }
+
+  /**
+   * Get minor type for this vector. The vector holds values belonging
+   * to a particular type.
+   *
+   * @return {@link MinorType}
+   */
+  @Override
+  public MinorType getMinorType() {
+    return MinorType.FLOAT2;
+  }
+
+
+  /*----------------------------------------------------------------*
+   |                                                                |
+   |          vector value retrieval methods                        |
+   |                                                                |
+   *----------------------------------------------------------------*/
+
+
+  /**
+   * Get the element at the given index from the vector.
+   *
+   * @param index   position of element
+   * @return element at given index
+   */
+  public short get(int index) throws IllegalStateException {
+    if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+      throw new IllegalStateException("Value at index is null");
+    }
+    return valueBuffer.getShort((long) index * TYPE_WIDTH);
+  }
+
+  /**
+   * Get the element at the given index from the vector and
+   * sets the state in holder. If element at given index
+   * is null, holder.isSet will be zero.
+   *
+   * @param index   position of element
+   */
+  public void get(int index, NullableFloat2Holder holder) {
+    if (isSet(index) == 0) {
+      holder.isSet = 0;
+      return;
+    }
+    holder.isSet = 1;
+    holder.value = valueBuffer.getShort((long) index * TYPE_WIDTH);
+  }
+
+  /**
+   * Same as {@link #get(int)}.
+   *
+   * @param index   position of element
+   * @return element at given index
+   */
+  @Override
+  public Short getObject(int index) {
+    if (isSet(index) == 0) {
+      return null;
+    } else {
+      return valueBuffer.getShort((long) index * TYPE_WIDTH);
+    }
+  }
+
+  /**
+   * Given a data buffer, get the value stored at a particular position
+   * in the vector.
+   *
+   * <p>This method should not be used externally.
+   *
+   * @param buffer data buffer
+   * @param index position of the element.
+   * @return value stored at the index.
+   */
+  static short get(final ArrowBuf buffer, final int index) {
+    return buffer.getShort((long) index * TYPE_WIDTH);
+  }
+
+  @Override
+  public double getValueAsDouble(int index) {
+    return getValueAsFloat(index);
+  }
+
+  public float getValueAsFloat(int index) {
+    return Float16.toFloat(this.get(index));
+  }
+
+  /*----------------------------------------------------------------*
+   |                                                                |
+   |          vector value setter methods                           |
+   |                                                                |
+   *----------------------------------------------------------------*/
+
+  private void setValue(int index, short value) {
+    valueBuffer.setShort((long) index * TYPE_WIDTH, value);
+  }
+
+  private void setValue(int index, float value) {
+    valueBuffer.setShort((long) index * TYPE_WIDTH, Float16.toFloat16(value));
+  }
+
+  /**
+   * Set the element at the given index to the given value.
+   *
+   * @param index   position of element
+   * @param value   value of element
+   */
+  public void set(int index, short value) {
+    BitVectorHelper.setBit(validityBuffer, index);
+    setValue(index, value);
+  }
+
+  /**
+   * Set the element at the given index to the given value.
+   *
+   * @param index   position of element
+   * @param value   value of element
+   */
+  public void setWithPossibleTruncate(int index, float value) {
+    BitVectorHelper.setBit(validityBuffer, index);
+    setValue(index, value);
+  }
+
+  /**
+   * Set the element at the given index to the value set in data holder.
+   * If the value in holder is not indicated as set, element in the
+   * at the given index will be null.
+   *
+   * @param index   position of element
+   * @param holder  nullable data holder for value of element
+   */
+  public void set(int index, NullableFloat2Holder holder) throws IllegalArgumentException {
+    if (holder.isSet < 0) {
+      throw new IllegalArgumentException();
+    } else if (holder.isSet > 0) {
+      BitVectorHelper.setBit(validityBuffer, index);
+      setValue(index, holder.value);
+    } else {
+      BitVectorHelper.unsetBit(validityBuffer, index);
+    }
+  }
+
+  /**
+   * Set the element at the given index to the value set in data holder.
+   *
+   * @param index   position of element
+   * @param holder  data holder for value of element
+   */
+  public void set(int index, Float2Holder holder) {
+    BitVectorHelper.setBit(validityBuffer, index);
+    setValue(index, holder.value);
+  }
+
+  /**
+   * Same as {@link #set(int, short)} except that it handles the
+   * case when index is greater than or equal to existing
+   * value capacity {@link #getValueCapacity()}.
+   *
+   * @param index   position of element
+   * @param value   value of element
+   */
+  public void setSafe(int index, short value) {
+    handleSafe(index);
+    set(index, value);
+  }
+
+  /**
+   * Same as {@link #setWithPossibleTruncate(int, float)} except that it handles the
+   * case when index is greater than or equal to existing
+   * value capacity {@link #getValueCapacity()}.
+   *
+   * @param index   position of element
+   * @param value   value of element
+   */
+  public void setSafeWithPossibleTruncate(int index, float value) {
+    handleSafe(index);
+    setWithPossibleTruncate(index, value);
+  }
+
+  /**
+   * Same as {@link #set(int, NullableFloat2Holder)} except that it handles the
+   * case when index is greater than or equal to existing
+   * value capacity {@link #getValueCapacity()}.
+   *
+   * @param index   position of element
+   * @param holder  nullable data holder for value of element
+   */
+  public void setSafe(int index, NullableFloat2Holder holder) throws IllegalArgumentException {
+    handleSafe(index);
+    set(index, holder);
+  }
+
+  /**
+   * Same as {@link #set(int, Float2Holder)} except that it handles the
+   * case when index is greater than or equal to existing
+   * value capacity {@link #getValueCapacity()}.
+   *
+   * @param index   position of element
+   * @param holder  data holder for value of element
+   */
+  public void setSafe(int index, Float2Holder holder) {
+    handleSafe(index);
+    set(index, holder);
+  }
+
+  /**
+   * Store the given value at a particular position in the vector. isSet indicates
+   * whether the value is NULL or not.
+   *
+   * @param index position of the new value
+   * @param isSet 0 for NULL value, 1 otherwise
+   * @param value element value
+   */
+  public void set(int index, int isSet, short value) {
+    if (isSet > 0) {
+      set(index, value);
+    } else {
+      BitVectorHelper.unsetBit(validityBuffer, index);
+    }
+  }
+
+  /**
+   * Store the given value at a particular position in the vector. isSet indicates
+   * whether the value is NULL or not.
+   *
+   * @param index position of the new value
+   * @param isSet 0 for NULL value, 1 otherwise
+   * @param value element value
+   */
+  public void setWithPossibleTruncate(int index, int isSet, float value) {
+    if (isSet > 0) {
+      setWithPossibleTruncate(index, value);
+    } else {
+      BitVectorHelper.unsetBit(validityBuffer, index);
+    }
+  }
+
+  /**
+   * Same as {@link #set(int, int, short)} except that it handles the case
+   * when index is greater than or equal to current value capacity of the
+   * vector.
+   *
+   * @param index position of the new value
+   * @param isSet 0 for NULL value, 1 otherwise
+   * @param value element value
+   */
+  public void setSafe(int index, int isSet, short value) {
+    handleSafe(index);
+    set(index, isSet, value);
+  }
+
+  /**
+   * Same as {@link #set(int, int, short)} except that it handles the case
+   * when index is greater than or equal to current value capacity of the
+   * vector.
+   *
+   * @param index position of the new value
+   * @param isSet 0 for NULL value, 1 otherwise
+   * @param value element value
+   */
+  public void setSafeWithPossibleTruncate(int index, int isSet, float value) {
+    handleSafe(index);
+    setWithPossibleTruncate(index, isSet, value);
+  }
+
+  @Override
+  public void setWithPossibleTruncate(int index, double value) {
+    throw new UnsupportedOperationException("The operation for double data types is not supported.");
+  }
+
+  @Override
+  public void setSafeWithPossibleTruncate(int index, double value) {
+    throw new UnsupportedOperationException("The operation for double data types is not supported.");
+  }
+
+  /*----------------------------------------------------------------*
+   |                                                                |
+   |                      vector transfer                           |
+   |                                                                |
+   *----------------------------------------------------------------*/
+
+  /**
+   * Construct a TransferPair comprising this and a target vector of
+   * the same type.
+   *
+   * @param ref name of the target vector
+   * @param allocator allocator for the target vector
+   * @return {@link TransferPair}
+   */
+  @Override
+  public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+    return new TransferImpl(ref, allocator);
+  }
+
+  /**
+   * Construct a TransferPair comprising this and a target vector of
+   * the same type.
+   *
+   * @param field Field object used by the target vector
+   * @param allocator allocator for the target vector
+   * @return {@link TransferPair}
+   */
+  @Override
+  public TransferPair getTransferPair(Field field, BufferAllocator allocator) {
+    return new TransferImpl(field, allocator);
+  }
+
+  /**
+   * Construct a TransferPair with a desired target vector of the same type.
+   *
+   * @param to target vector
+   * @return {@link TransferPair}
+   */
+  @Override
+  public TransferPair makeTransferPair(ValueVector to) {
+    return new TransferImpl((Float2Vector) to);
+  }
+
+  private class TransferImpl implements TransferPair {
+    Float2Vector to;
+
+    public TransferImpl(String ref, BufferAllocator allocator) {
+      to = new Float2Vector(ref, field.getFieldType(), allocator);
+    }
+
+    public TransferImpl(Field field, BufferAllocator allocator) {
+      to = new Float2Vector(field, allocator);
+    }
+
+    public TransferImpl(Float2Vector to) {
+      this.to = to;
+    }
+
+    @Override
+    public Float2Vector getTo() {
+      return to;
+    }
+
+    @Override
+    public void transfer() {
+      transferTo(to);
+    }
+
+    @Override
+    public void splitAndTransfer(int startIndex, int length) {
+      splitAndTransferTo(startIndex, length, to);
+    }
+
+    @Override
+    public void copyValueSafe(int fromIndex, int toIndex) {
+      to.copyFromSafe(fromIndex, toIndex, Float2Vector.this);
+    }
+  }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
index f29157524f2df..0b0e0d66a98f0 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
@@ -18,6 +18,7 @@
 package org.apache.arrow.vector.types;
 
 import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE;
+import static org.apache.arrow.vector.types.FloatingPointPrecision.HALF;
 import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE;
 import static org.apache.arrow.vector.types.UnionMode.Dense;
 import static org.apache.arrow.vector.types.UnionMode.Sparse;
@@ -33,6 +34,7 @@
 import org.apache.arrow.vector.ExtensionTypeVector;
 import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float2Vector;
 import org.apache.arrow.vector.Float4Vector;
 import org.apache.arrow.vector.Float8Vector;
 import org.apache.arrow.vector.IntVector;
@@ -79,6 +81,7 @@
 import org.apache.arrow.vector.complex.impl.DenseUnionWriter;
 import org.apache.arrow.vector.complex.impl.DurationWriterImpl;
 import org.apache.arrow.vector.complex.impl.FixedSizeBinaryWriterImpl;
+import org.apache.arrow.vector.complex.impl.Float2WriterImpl;
 import org.apache.arrow.vector.complex.impl.Float4WriterImpl;
 import org.apache.arrow.vector.complex.impl.Float8WriterImpl;
 import org.apache.arrow.vector.complex.impl.IntWriterImpl;
@@ -432,6 +435,17 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
         return new IntervalYearWriterImpl((IntervalYearVector) vector);
       }
     },
+    FLOAT2(new FloatingPoint(HALF)) {
+      @Override
+      public FieldVector getNewVector(Field field, BufferAllocator allocator, CallBack schemaChangeCallback) {
+        return new Float2Vector(field, allocator);
+      }
+
+      @Override
+      public FieldWriter getNewFieldWriter(ValueVector vector) {
+        return new Float2WriterImpl((Float2Vector) vector);
+      }
+    },
     //  4 byte ieee 754
     FLOAT4(new FloatingPoint(SINGLE)) {
       @Override
@@ -894,7 +908,7 @@ public MinorType visit(Int type) {
       public MinorType visit(FloatingPoint type) {
         switch (type.getPrecision()) {
           case HALF:
-            throw new UnsupportedOperationException("NYI: " + type);
+            return MinorType.FLOAT2;
           case SINGLE:
             return MinorType.FLOAT4;
           case DOUBLE:
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
index 614aff18d4554..10091aebdd50b 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
@@ -332,6 +332,204 @@ public void testSizeOfValueBuffer() {
     }
   }
 
+  @Test
+  public void testFixedFloat2() {
+    try (final Float2Vector floatVector = new Float2Vector(EMPTY_SCHEMA_PATH, allocator)) {
+      boolean error = false;
+      int initialCapacity = 16;
+
+      /* we should not throw exception for these values of capacity */
+      floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1);
+      floatVector.setInitialCapacity(MAX_VALUE_COUNT);
+
+      try {
+        floatVector.setInitialCapacity(MAX_VALUE_COUNT * 4);
+      } catch (OversizedAllocationException oe) {
+        error = true;
+      } finally {
+        assertTrue(error);
+        error = false;
+      }
+
+      floatVector.setInitialCapacity(initialCapacity);
+      /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */
+      assertEquals(0, floatVector.getValueCapacity());
+
+      /* allocate 32 bytes (16 * 2) */
+      floatVector.allocateNew();
+      /* underlying buffer should be able to store 16 values */
+      assertTrue(floatVector.getValueCapacity() >= initialCapacity);
+      initialCapacity = floatVector.getValueCapacity();
+
+      floatVector.zeroVector();
+
+      /* populate the floatVector */
+      floatVector.set(0, (short) 0x101c); // Float16.toFloat16(+0.00050163269043f)
+      floatVector.set(2, (short) 0x901c); // Float16.toFloat16(-0.00050163269043f)
+      floatVector.set(4, (short) 0x101d); // Float16.toFloat16(+0.000502109527588f)
+      floatVector.set(6, (short) 0x901d); // Float16.toFloat16(-0.000502109527588f)
+      floatVector.set(8, (short) 0x121c); // Float16.toFloat16(+0.00074577331543f)
+      floatVector.set(10, (short) 0x921c); // Float16.toFloat16(-0.00074577331543f)
+      floatVector.set(12, (short) 0x501c); // Float16.toFloat16(+32.875f)
+      floatVector.set(14, (short) 0xd01c); // Float16.toFloat16(-32.875f)
+
+      try {
+        floatVector.set(initialCapacity, (short) 0x141c);
+      } catch (IndexOutOfBoundsException ie) {
+        error = true;
+      } finally {
+        assertTrue(error);
+        error = false;
+      }
+
+      /* check vector contents */
+      assertEquals((short) 0x101c, floatVector.get(0));
+      assertEquals((short) 0x901c, floatVector.get(2));
+      assertEquals((short) 0x101d, floatVector.get(4));
+      assertEquals((short) 0x901d, floatVector.get(6));
+      assertEquals((short) 0x121c, floatVector.get(8));
+      assertEquals((short) 0x921c, floatVector.get(10));
+      assertEquals((short) 0x501c, floatVector.get(12));
+      assertEquals((short) 0xd01c, floatVector.get(14));
+
+      try {
+        floatVector.get(initialCapacity);
+      } catch (IndexOutOfBoundsException ie) {
+        error = true;
+      } finally {
+        assertTrue(error);
+      }
+
+      /* this should trigger a realloc() */
+      floatVector.setSafe(initialCapacity, (short) 0x141c); // Float16.toFloat16(+0.00100326538086f)
+
+      /* underlying buffer should now be able to store double the number of values */
+      assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2);
+
+      /* vector data should still be intact after realloc */
+      assertEquals((short) 0x101c, floatVector.get(0));
+      assertEquals((short) 0x901c, floatVector.get(2));
+      assertEquals((short) 0x101d, floatVector.get(4));
+      assertEquals((short) 0x901d, floatVector.get(6));
+      assertEquals((short) 0x121c, floatVector.get(8));
+      assertEquals((short) 0x921c, floatVector.get(10));
+      assertEquals((short) 0x501c, floatVector.get(12));
+      assertEquals((short) 0xd01c, floatVector.get(14));
+      assertEquals((short) 0x141c, floatVector.get(initialCapacity));
+
+      /* reset the vector */
+      int capacityBeforeReset = floatVector.getValueCapacity();
+      floatVector.reset();
+
+      /* capacity shouldn't change after reset */
+      assertEquals(capacityBeforeReset, floatVector.getValueCapacity());
+
+      /* vector data should be zeroed out */
+      for (int i = 0; i < capacityBeforeReset; i++) {
+        assertTrue("non-zero data not expected at index: " + i, floatVector.isNull(i));
+      }
+    }
+  }
+
+  @Test
+  public void testFixedFloat2WithPossibleTruncate() {
+    try (final Float2Vector floatVector = new Float2Vector(EMPTY_SCHEMA_PATH, allocator)) {
+      boolean error = false;
+      int initialCapacity = 16;
+
+      /* we should not throw exception for these values of capacity */
+      floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1);
+      floatVector.setInitialCapacity(MAX_VALUE_COUNT);
+
+      try {
+        floatVector.setInitialCapacity(MAX_VALUE_COUNT * 4);
+      } catch (OversizedAllocationException oe) {
+        error = true;
+      } finally {
+        assertTrue(error);
+        error = false;
+      }
+
+      floatVector.setInitialCapacity(initialCapacity);
+      /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */
+      assertEquals(0, floatVector.getValueCapacity());
+
+      /* allocate 32 bytes (16 * 2) */
+      floatVector.allocateNew();
+      /* underlying buffer should be able to store 16 values */
+      assertTrue(floatVector.getValueCapacity() >= initialCapacity);
+      initialCapacity = floatVector.getValueCapacity();
+
+      floatVector.zeroVector();
+
+      /* populate the floatVector */
+      floatVector.set(0, (short) 0x101c); // Float16.toFloat16(+0.00050163269043f)
+      floatVector.set(2, (short) 0x901c); // Float16.toFloat16(-0.00050163269043f)
+      floatVector.set(4, (short) 0x101d); // Float16.toFloat16(+0.000502109527588f)
+      floatVector.setWithPossibleTruncate(6, 2049.0f); // in f32=2049.000000, out f16=2048
+      floatVector.setWithPossibleTruncate(8, 4098.0f); // in f32=4098.000000, out f16=4096
+      floatVector.setWithPossibleTruncate(10, 8196.0f); // in f32=8196.000000, out f16=8192
+      floatVector.setWithPossibleTruncate(12, 16392.0f); // in f32=16392.000000, out f16=16384
+      floatVector.setWithPossibleTruncate(14, 32784.0f); // in f32=32784.000000, out f16=32768
+
+      try {
+        floatVector.setWithPossibleTruncate(initialCapacity, 1.618034f); // in f32=1.618034, out f16=1.6181641
+      } catch (IndexOutOfBoundsException ie) {
+        error = true;
+      } finally {
+        assertTrue(error);
+        error = false;
+      }
+
+      /* check vector contents */
+      assertEquals((short) 0x101c, floatVector.get(0));
+      assertEquals((short) 0x901c, floatVector.get(2));
+      assertEquals((short) 0x101d, floatVector.get(4));
+      assertEquals(2048.0f, floatVector.getValueAsFloat(6), 0);
+      assertEquals(4096.0f, floatVector.getValueAsFloat(8), 0);
+      assertEquals(8192.0f, floatVector.getValueAsFloat(10), 0);
+      assertEquals(16384.0f, floatVector.getValueAsDouble(12), 0);
+      assertEquals(32768.0f, floatVector.getValueAsDouble(14), 0);
+
+      try {
+        floatVector.get(initialCapacity);
+      } catch (IndexOutOfBoundsException ie) {
+        error = true;
+      } finally {
+        assertTrue(error);
+      }
+
+      /* this should trigger a realloc() */
+      floatVector.setSafeWithPossibleTruncate(initialCapacity, 1.618034f); // in f32=1.618034, out f16=1.6181641
+
+      /* underlying buffer should now be able to store double the number of values */
+      assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2);
+
+      /* vector data should still be intact after realloc */
+      assertEquals((short) 0x101c, floatVector.get(0));
+      assertEquals((short) 0x901c, floatVector.get(2));
+      assertEquals((short) 0x101d, floatVector.get(4));
+      assertEquals(2048.0f, floatVector.getValueAsFloat(6), 0);
+      assertEquals(4096.0f, floatVector.getValueAsFloat(8), 0);
+      assertEquals(8192.0f, floatVector.getValueAsFloat(10), 0);
+      assertEquals(16384.0f, floatVector.getValueAsDouble(12), 0);
+      assertEquals(32768.0f, floatVector.getValueAsDouble(14), 0);
+      assertEquals(1.6181641f, floatVector.getValueAsDouble(initialCapacity), 0);
+
+      /* reset the vector */
+      int capacityBeforeReset = floatVector.getValueCapacity();
+      floatVector.reset();
+
+      /* capacity shouldn't change after reset */
+      assertEquals(capacityBeforeReset, floatVector.getValueCapacity());
+
+      /* vector data should be zeroed out */
+      for (int i = 0; i < capacityBeforeReset; i++) {
+        assertTrue("non-zero data not expected at index: " + i, floatVector.isNull(i));
+      }
+    }
+  }
+
   @Test /* Float4Vector */
   public void testFixedType3() {
     try (final Float4Vector floatVector = new Float4Vector(EMPTY_SCHEMA_PATH, allocator)) {

From 6ccfeeec3b864671556e50c1ac01e65f47bd06d9 Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Thu, 1 Feb 2024 21:14:47 +0800
Subject: [PATCH 12/74] GH-39876: [C++] Thirdparty: Bump zlib to 1.3.1 (#39877)

### Rationale for this change

zlib 1.3.1 is the latest release.

### What changes are included in this PR?

Bump zlib to 1.3.1

### Are these changes tested?

Already has testing

### Are there any user-facing changes?

no

* Closes: #39876

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/thirdparty/versions.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 2664775c0fbf4..dd3f5da84f777 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -115,8 +115,8 @@ ARROW_UTF8PROC_BUILD_VERSION=v2.7.0
 ARROW_UTF8PROC_BUILD_SHA256_CHECKSUM=4bb121e297293c0fd55f08f83afab6d35d48f0af4ecc07523ad8ec99aa2b12a1
 ARROW_XSIMD_BUILD_VERSION=9.0.1
 ARROW_XSIMD_BUILD_SHA256_CHECKSUM=b1bb5f92167fd3a4f25749db0be7e61ed37e0a5d943490f3accdcd2cd2918cc0
-ARROW_ZLIB_BUILD_VERSION=1.3
-ARROW_ZLIB_BUILD_SHA256_CHECKSUM=ff0ba4c292013dbc27530b3a81e1f9a813cd39de01ca5e0f8bf355702efa593e
+ARROW_ZLIB_BUILD_VERSION=1.3.1
+ARROW_ZLIB_BUILD_SHA256_CHECKSUM=9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23
 ARROW_ZSTD_BUILD_VERSION=1.5.5
 ARROW_ZSTD_BUILD_SHA256_CHECKSUM=9c4396cc829cfae319a6e2615202e82aad41372073482fce286fac78646d3ee4
 

From 2721134715b7dedfa2715bcf47548728ff702d5a Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Thu, 1 Feb 2024 21:24:42 +0800
Subject: [PATCH 13/74] GH-39845: [C++][Parquet] Minor: avoid creating a new
 Reader object in Decoder::SetData (#39847)

### Rationale for this change

avoid creating a new Reader object in Decoder::SetData

### What changes are included in this PR?

avoid creating a new Reader object in Decoder::SetData

### Are these changes tested?

Already

### Are there any user-facing changes?

no

* Closes: #39845

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/parquet/encoding.cc | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index b801b5ab11bb9..5573f5b9aed4c 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -2411,7 +2411,11 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder<DTyp
   void SetData(int num_values, const uint8_t* data, int len) override {
     // num_values is equal to page's num_values, including null values in this page
     this->num_values_ = num_values;
-    decoder_ = std::make_shared<::arrow::bit_util::BitReader>(data, len);
+    if (decoder_ == nullptr) {
+      decoder_ = std::make_shared<::arrow::bit_util::BitReader>(data, len);
+    } else {
+      decoder_->Reset(data, len);
+    }
     InitHeader();
   }
 
@@ -2769,7 +2773,11 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl,
 
   void SetData(int num_values, const uint8_t* data, int len) override {
     DecoderImpl::SetData(num_values, data, len);
-    decoder_ = std::make_shared<::arrow::bit_util::BitReader>(data, len);
+    if (decoder_ == nullptr) {
+      decoder_ = std::make_shared<::arrow::bit_util::BitReader>(data, len);
+    } else {
+      decoder_->Reset(data, len);
+    }
     DecodeLengths();
   }
 

From 44d5597a0e8a4d635f1aec82ba885f61b5c17829 Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Thu, 1 Feb 2024 14:35:32 +0100
Subject: [PATCH 14/74] GH-39849: [Python] Remove the use of
 pytest-lazy-fixture (#39850)

### Rationale for this change

Removing the use of `pytest-lazy-fixture` in our test suite as it is unmaintained.
Changes in this PR include:

- Remove the use of `pytest-lazy-fixture`
- Remove marks from fixtures to avoid future error, see
   ```
   PytestRemovedIn9Warning: Marks applied to fixtures have no effect
     See docs: https://docs.pytest.org/en/stable/deprecations.html#applying-a-mark-to-a-fixture-function
   ```
- Catch two different warnings in `def test_legacy_int_type()`

### Are these changes tested?

The changes affect the tests so they must pass.

### Are there any user-facing changes?

No.
* Closes: #39849

Lead-authored-by: AlenkaF <frim.alenka@gmail.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 ci/conda_env_python.txt                     |  3 +-
 dev/tasks/conda-recipes/arrow-cpp/meta.yaml |  1 -
 python/pyarrow/tests/conftest.py            |  7 ++---
 python/pyarrow/tests/test_dataset.py        |  3 --
 python/pyarrow/tests/test_extension_type.py |  5 +--
 python/pyarrow/tests/test_fs.py             | 34 ++++++++++-----------
 python/pyarrow/tests/test_ipc.py            |  6 ++--
 python/requirements-test.txt                |  1 -
 python/requirements-wheel-test.txt          |  1 -
 9 files changed, 25 insertions(+), 36 deletions(-)

diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt
index 5fdd21d2bd1f9..59e2def1bf339 100644
--- a/ci/conda_env_python.txt
+++ b/ci/conda_env_python.txt
@@ -23,9 +23,8 @@ cloudpickle
 fsspec
 hypothesis
 numpy>=1.16.6
-pytest<8  # pytest-lazy-fixture broken on pytest 8.0.0
+pytest<8
 pytest-faulthandler
-pytest-lazy-fixture
 s3fs>=2023.10.0
 setuptools
 setuptools_scm<8.0.0
diff --git a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
index b8ffbfdb715b6..367445c595c4b 100644
--- a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
+++ b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
@@ -340,7 +340,6 @@ outputs:
         # test_cpp_extension_in_python requires a compiler
         - {{ compiler("cxx") }}  # [linux]
         - pytest
-        - pytest-lazy-fixture
         - backports.zoneinfo     # [py<39]
         - boto3
         - cffi
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index a5941e8c8d1a8..0da757a4bc56e 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -24,7 +24,6 @@
 import urllib.request
 
 import pytest
-from pytest_lazyfixture import lazy_fixture
 import hypothesis as h
 from ..conftest import groups, defaults
 
@@ -259,13 +258,13 @@ def gcs_server():
 
 @pytest.fixture(
     params=[
-        lazy_fixture('builtin_pickle'),
-        lazy_fixture('cloudpickle')
+        'builtin_pickle',
+        'cloudpickle'
     ],
     scope='session'
 )
 def pickle_module(request):
-    return request.param
+    return request.getfixturevalue(request.param)
 
 
 @pytest.fixture(scope='session')
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index a4838d63a6b0b..a9054f0b174aa 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -100,7 +100,6 @@ def assert_dataset_fragment_convenience_methods(dataset):
 
 
 @pytest.fixture
-@pytest.mark.parquet
 def mockfs():
     mockfs = fs._MockFileSystem()
 
@@ -221,7 +220,6 @@ def multisourcefs(request):
 
 
 @pytest.fixture
-@pytest.mark.parquet
 def dataset(mockfs):
     format = ds.ParquetFileFormat()
     selector = fs.FileSelector('subdir', recursive=True)
@@ -2692,7 +2690,6 @@ def test_dataset_partitioned_dictionary_type_reconstruct(tempdir, pickle_module)
 
 
 @pytest.fixture
-@pytest.mark.parquet
 def s3_example_simple(s3_server):
     from pyarrow.fs import FileSystem
 
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index a88e20eefe098..d8c792ef00c6b 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -1485,10 +1485,7 @@ def test_legacy_int_type():
     batch = pa.RecordBatch.from_arrays([ext_arr], names=['ext'])
     buf = ipc_write_batch(batch)
 
-    with pytest.warns(
-            RuntimeWarning,
-            match="pickle-based deserialization of pyarrow.PyExtensionType "
-                  "subclasses is disabled by default"):
+    with pytest.warns((RuntimeWarning, FutureWarning)):
         batch = ipc_read_batch(buf)
         assert isinstance(batch.column(0).type, pa.UnknownExtensionType)
 
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index d0fa253e314e9..ab10addfc3d4c 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -362,79 +362,79 @@ def py_fsspec_s3fs(request, s3_server):
 
 @pytest.fixture(params=[
     pytest.param(
-        pytest.lazy_fixture('localfs'),
+        'localfs',
         id='LocalFileSystem()'
     ),
     pytest.param(
-        pytest.lazy_fixture('localfs_with_mmap'),
+        'localfs_with_mmap',
         id='LocalFileSystem(use_mmap=True)'
     ),
     pytest.param(
-        pytest.lazy_fixture('subtree_localfs'),
+        'subtree_localfs',
         id='SubTreeFileSystem(LocalFileSystem())'
     ),
     pytest.param(
-        pytest.lazy_fixture('s3fs'),
+        's3fs',
         id='S3FileSystem',
         marks=pytest.mark.s3
     ),
     pytest.param(
-        pytest.lazy_fixture('gcsfs'),
+        'gcsfs',
         id='GcsFileSystem',
         marks=pytest.mark.gcs
     ),
     pytest.param(
-        pytest.lazy_fixture('hdfs'),
+        'hdfs',
         id='HadoopFileSystem',
         marks=pytest.mark.hdfs
     ),
     pytest.param(
-        pytest.lazy_fixture('mockfs'),
+        'mockfs',
         id='_MockFileSystem()'
     ),
     pytest.param(
-        pytest.lazy_fixture('py_localfs'),
+        'py_localfs',
         id='PyFileSystem(ProxyHandler(LocalFileSystem()))'
     ),
     pytest.param(
-        pytest.lazy_fixture('py_mockfs'),
+        'py_mockfs',
         id='PyFileSystem(ProxyHandler(_MockFileSystem()))'
     ),
     pytest.param(
-        pytest.lazy_fixture('py_fsspec_localfs'),
+        'py_fsspec_localfs',
         id='PyFileSystem(FSSpecHandler(fsspec.LocalFileSystem()))'
     ),
     pytest.param(
-        pytest.lazy_fixture('py_fsspec_memoryfs'),
+        'py_fsspec_memoryfs',
         id='PyFileSystem(FSSpecHandler(fsspec.filesystem("memory")))'
     ),
     pytest.param(
-        pytest.lazy_fixture('py_fsspec_s3fs'),
+        'py_fsspec_s3fs',
         id='PyFileSystem(FSSpecHandler(s3fs.S3FileSystem()))',
         marks=pytest.mark.s3
     ),
 ])
 def filesystem_config(request):
-    return request.param
+    return request.getfixturevalue(request.param)
 
 
 @pytest.fixture
-def fs(request, filesystem_config):
+def fs(filesystem_config):
     return filesystem_config['fs']
 
 
 @pytest.fixture
-def pathfn(request, filesystem_config):
+def pathfn(filesystem_config):
     return filesystem_config['pathfn']
 
 
 @pytest.fixture
-def allow_move_dir(request, filesystem_config):
+def allow_move_dir(filesystem_config):
     return filesystem_config['allow_move_dir']
 
 
 @pytest.fixture
-def allow_append_to_file(request, filesystem_config):
+def allow_append_to_file(filesystem_config):
     return filesystem_config['allow_append_to_file']
 
 
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index f75ec8158a9da..407011d90b734 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -142,16 +142,16 @@ def stream_fixture():
 
 @pytest.fixture(params=[
     pytest.param(
-        pytest.lazy_fixture('file_fixture'),
+        'file_fixture',
         id='File Format'
     ),
     pytest.param(
-        pytest.lazy_fixture('stream_fixture'),
+        'stream_fixture',
         id='Stream Format'
     )
 ])
 def format_fixture(request):
-    return request.param
+    return request.getfixturevalue(request.param)
 
 
 def test_empty_file():
diff --git a/python/requirements-test.txt b/python/requirements-test.txt
index b3ba5d852b968..2108d70a543f5 100644
--- a/python/requirements-test.txt
+++ b/python/requirements-test.txt
@@ -2,5 +2,4 @@ cffi
 hypothesis
 pandas
 pytest<8
-pytest-lazy-fixture
 pytz
diff --git a/python/requirements-wheel-test.txt b/python/requirements-wheel-test.txt
index c74a8ca6908a7..a1046bc18c704 100644
--- a/python/requirements-wheel-test.txt
+++ b/python/requirements-wheel-test.txt
@@ -2,7 +2,6 @@ cffi
 cython
 hypothesis
 pytest<8
-pytest-lazy-fixture
 pytz
 tzdata; sys_platform == 'win32'
 

From 3d45ac96534fc76b820b488aa02182e6b93a388f Mon Sep 17 00:00:00 2001
From: "y.yoshida5" <39612448+yo1956@users.noreply.github.com>
Date: Thu, 1 Feb 2024 22:36:59 +0900
Subject: [PATCH 15/74] GH-39779: [Python] Expose force_virtual_addressing in
 PyArrow (#39819)

### Rationale for this change / What changes are included in this PR?

To expose force_virtual_addressing in PyArrow.

### Are these changes tested?

Existing unit tests are not broken, and a new test case have been added.

### Are there any user-facing changes?

pyarrow.fs.S3FileSystem: it becomes possible to specify the argument 'force_virtual_addressing'.

* Closes: #39779

Authored-by: yo1956 <hm.hr.yossy@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/_s3fs.pyx                | 11 ++++++++++-
 python/pyarrow/includes/libarrow_fs.pxd |  1 +
 python/pyarrow/tests/test_fs.py         |  4 ++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx
index 13b8c748cb8ca..f5bab99a49f7a 100644
--- a/python/pyarrow/_s3fs.pyx
+++ b/python/pyarrow/_s3fs.pyx
@@ -245,6 +245,11 @@ cdef class S3FileSystem(FileSystem):
     retry_strategy : S3RetryStrategy, default AwsStandardS3RetryStrategy(max_attempts=3)
         The retry strategy to use with S3; fail after max_attempts. Available
         strategies are AwsStandardS3RetryStrategy, AwsDefaultS3RetryStrategy.
+    force_virtual_addressing : bool, default False
+        Whether to use virtual addressing of buckets.
+        If true, then virtual addressing is always enabled.
+        If false, then virtual addressing is only enabled if `endpoint_override` is empty.
+        This can be used for non-AWS backends that only support virtual hosted-style access.
 
     Examples
     --------
@@ -268,7 +273,9 @@ cdef class S3FileSystem(FileSystem):
                  role_arn=None, session_name=None, external_id=None,
                  load_frequency=900, proxy_options=None,
                  allow_bucket_creation=False, allow_bucket_deletion=False,
-                 retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy(max_attempts=3)):
+                 retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy(
+                     max_attempts=3),
+                 force_virtual_addressing=False):
         cdef:
             optional[CS3Options] options
             shared_ptr[CS3FileSystem] wrapped
@@ -380,6 +387,7 @@ cdef class S3FileSystem(FileSystem):
 
         options.value().allow_bucket_creation = allow_bucket_creation
         options.value().allow_bucket_deletion = allow_bucket_deletion
+        options.value().force_virtual_addressing = force_virtual_addressing
 
         if isinstance(retry_strategy, AwsStandardS3RetryStrategy):
             options.value().retry_strategy = CS3RetryStrategy.GetAwsStandardRetryStrategy(
@@ -447,6 +455,7 @@ cdef class S3FileSystem(FileSystem):
                                    opts.proxy_options.username),
                                'password': frombytes(
                                    opts.proxy_options.password)},
+                force_virtual_addressing=opts.force_virtual_addressing,
             ),)
         )
 
diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd
index cb30f4e750eff..7876fb0f96671 100644
--- a/python/pyarrow/includes/libarrow_fs.pxd
+++ b/python/pyarrow/includes/libarrow_fs.pxd
@@ -167,6 +167,7 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
         c_bool background_writes
         c_bool allow_bucket_creation
         c_bool allow_bucket_deletion
+        c_bool force_virtual_addressing
         shared_ptr[const CKeyValueMetadata] default_metadata
         c_string role_arn
         c_string session_name
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index ab10addfc3d4c..6ba5137e4f63e 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -1186,6 +1186,10 @@ def test_s3_options(pickle_module):
     assert pickle_module.loads(pickle_module.dumps(fs2)) == fs2
     assert fs2 != fs
 
+    fs = S3FileSystem(endpoint_override='localhost:8999', force_virtual_addressing=True)
+    assert isinstance(fs, S3FileSystem)
+    assert pickle_module.loads(pickle_module.dumps(fs)) == fs
+
     with pytest.raises(ValueError):
         S3FileSystem(access_key='access')
     with pytest.raises(ValueError):

From a1c1773b724e4d78faf9a097247c7e976cd2cbfa Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 1 Feb 2024 14:53:35 +0100
Subject: [PATCH 16/74] GH-39555: [Packaging][Python] Enable building pyarrow
 against numpy 2.0 (#39557)

### Rationale for this change

Ensure we can build pyarrow against numpy 2.0 nightly (update pyproject.toml to allow this), and test this by building our nightly wheels with numpy nightly. This also ensures that other projects that use our nightly wheels to test together with numpy nightly can do that (numpy 2.0 changes the ABI, so to run with numpy 2.0, your package needs to be built with numpy 2.x; currently pyarrow installed with our nightly wheel will fail to import when also numpy nightly is installed).

See the parent issue https://github.com/apache/arrow/issues/39532 for details, and https://numpy.org/devdocs/dev/depending_on_numpy.html#numpy-2-0-specific-advice for a direct link to the NumPy guidelines on updating build dependencies for NumPy 2.0.

* Closes: #39555

Lead-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 ci/docker/python-wheel-manylinux.dockerfile      | 5 +++--
 ci/docker/python-wheel-windows-vs2017.dockerfile | 3 ++-
 ci/scripts/python_wheel_macos_build.sh           | 5 ++++-
 python/pyproject.toml                            | 7 ++++++-
 python/requirements-build.txt                    | 3 ++-
 python/requirements-wheel-build.txt              | 3 ++-
 python/setup.py                                  | 2 +-
 7 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile
index 0a50d450c225a..a07c727ac76fa 100644
--- a/ci/docker/python-wheel-manylinux.dockerfile
+++ b/ci/docker/python-wheel-manylinux.dockerfile
@@ -28,7 +28,7 @@ ENV MANYLINUX_VERSION=${manylinux}
 RUN yum install -y dnf
 
 # Install basic dependencies
-RUN dnf install -y git flex curl autoconf zip perl-IPC-Cmd wget kernel-headers
+RUN dnf install -y git flex curl autoconf zip perl-IPC-Cmd wget
 
 # A system Python is required for ninja and vcpkg in this Dockerfile.
 # On manylinux2014 base images, system Python is 2.7.5, while
@@ -97,4 +97,5 @@ SHELL ["/bin/bash", "-i", "-c"]
 ENTRYPOINT ["/bin/bash", "-i", "-c"]
 
 COPY python/requirements-wheel-build.txt /arrow/python/
-RUN pip install -r /arrow/python/requirements-wheel-build.txt
+# TODO(GH-39848) Remove the `--pre --extra-index-url` for numpy nightly again before the 16.0 release 
+RUN pip install -r /arrow/python/requirements-wheel-build.txt --pre --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
diff --git a/ci/docker/python-wheel-windows-vs2017.dockerfile b/ci/docker/python-wheel-windows-vs2017.dockerfile
index faf07800c956a..067105b3a7995 100644
--- a/ci/docker/python-wheel-windows-vs2017.dockerfile
+++ b/ci/docker/python-wheel-windows-vs2017.dockerfile
@@ -88,7 +88,8 @@ RUN choco install -r -y --no-progress python --version=%PYTHON_VERSION%
 RUN python -m pip install -U pip setuptools
 
 COPY python/requirements-wheel-build.txt arrow/python/
-RUN python -m pip install -r arrow/python/requirements-wheel-build.txt
+# TODO(GH-39848) Remove the `--pre --extra-index-url` for numpy nightly again before the 16.0 release 
+RUN python -m pip install -r arrow/python/requirements-wheel-build.txt --pre --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
 
 # ENV CLCACHE_DIR="C:\clcache"
 # ENV CLCACHE_COMPRESS=1
diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index fd845c512dcdb..8123a9fdf1c48 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -50,12 +50,15 @@ echo "=== (${PYTHON_VERSION}) Install Python build dependencies ==="
 export PIP_SITE_PACKAGES=$(python -c 'import site; print(site.getsitepackages()[0])')
 export PIP_TARGET_PLATFORM="macosx_${MACOSX_DEPLOYMENT_TARGET//./_}_${arch}"
 
+# TODO(GH-39848) Remove the `--pre --extra-index-url` for numpy nightly again before the 16.0 release 
 pip install \
   --upgrade \
   --only-binary=:all: \
   --target $PIP_SITE_PACKAGES \
   --platform $PIP_TARGET_PLATFORM \
-  -r ${source_dir}/python/requirements-wheel-build.txt
+  -r ${source_dir}/python/requirements-wheel-build.txt \
+  --pre \
+  --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
 pip install "delocate>=0.10.3"
 
 echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 437de105ab8e7..9079618ad1c7d 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -18,7 +18,12 @@
 [build-system]
 requires = [
     "cython >= 0.29.31",
-    "oldest-supported-numpy>=0.14",
+    # Starting with NumPy 1.25, NumPy is (by default) as far back compatible
+    # as oldest-support-numpy was (customizable with a NPY_TARGET_VERSION
+    # define).  For older Python versions (where NumPy 1.25 is not yet avaiable)
+    # continue using oldest-support-numpy.
+    "oldest-supported-numpy>=0.14; python_version<'3.9'",
+    "numpy>=1.25; python_version>='3.9'",
     "setuptools_scm < 8.0.0",
     "setuptools >= 40.1.0",
     "wheel"
diff --git a/python/requirements-build.txt b/python/requirements-build.txt
index 56e9d479ee9ba..e1372e807f88d 100644
--- a/python/requirements-build.txt
+++ b/python/requirements-build.txt
@@ -1,4 +1,5 @@
 cython>=0.29.31
-oldest-supported-numpy>=0.14
+oldest-supported-numpy>=0.14; python_version<'3.9'
+numpy>=1.25; python_version>='3.9'
 setuptools_scm<8.0.0
 setuptools>=38.6.0
diff --git a/python/requirements-wheel-build.txt b/python/requirements-wheel-build.txt
index f42ee4a018f3c..044f9de5f8214 100644
--- a/python/requirements-wheel-build.txt
+++ b/python/requirements-wheel-build.txt
@@ -1,5 +1,6 @@
 cython>=0.29.31
-oldest-supported-numpy>=0.14
+oldest-supported-numpy>=0.14; python_version<'3.9'
+numpy>=1.25; python_version>='3.9'
 setuptools_scm<8.0.0
 setuptools>=58
 wheel
diff --git a/python/setup.py b/python/setup.py
index d7a2da2077cdd..098d75a3186af 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -449,7 +449,7 @@ def has_ext_modules(foo):
 
 
 install_requires = (
-    'numpy >= 1.16.6, <2',
+    'numpy >= 1.16.6',
 )
 
 
From 4ceb66101382d74c6ef73ff546fad10183ab58d8 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 1 Feb 2024 14:54:14 +0100
Subject: [PATCH 17/74] GH-39880: [Python][CI] Pin moto<5 for dask integration
 tests (#39881)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

See upstream pin being added (https://github.com/dask/dask/pull/10868 / https://github.com/dask/dask/issues/10869), we are seeing the same failures
* Closes: #39880

Lead-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 ci/scripts/install_dask.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ci/scripts/install_dask.sh b/ci/scripts/install_dask.sh
index 8d712a88a6ab1..478c1d5997906 100755
--- a/ci/scripts/install_dask.sh
+++ b/ci/scripts/install_dask.sh
@@ -35,4 +35,5 @@ else
 fi
 
 # additional dependencies needed for dask's s3 tests
-pip install moto[server] flask requests
+# Moto 5 results in timeouts in s3 tests: https://github.com/dask/dask/issues/10869
+pip install "moto[server]<5" flask requests

From b684028dfbeeed85d132a1249449a85877d796b1 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 1 Feb 2024 08:16:56 -0600
Subject: [PATCH 18/74] GH-39859: [R] Remove macOS from the allow list (#39861)

Originally this was going to also bundle all of our dependencies to send to CRAN, but their webforms don't allow source tars that large (I tried down to 80MB which removed a large number of our dependencies, and that was still rejected by the macbuilder).

This means that on CRAN, if there is no internet, the macOS binary will be minimal. But it means that we build on CRAN using source always.

We should definitely submit this to macbuilder after this merges to main and confirm we get source build by default (since we look to the repo for our allowlist)
* Closes: #39859

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/tools/nixlibs-allowlist.txt | 1 -
 r/tools/nixlibs.R             | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/r/tools/nixlibs-allowlist.txt b/r/tools/nixlibs-allowlist.txt
index 9c368e6ed15a2..bd9f0c1b2c084 100644
--- a/r/tools/nixlibs-allowlist.txt
+++ b/r/tools/nixlibs-allowlist.txt
@@ -2,4 +2,3 @@ ubuntu
 centos
 redhat
 rhel
-darwin
diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R
index 17c6ab0a8078b..0af41888b95b7 100644
--- a/r/tools/nixlibs.R
+++ b/r/tools/nixlibs.R
@@ -222,7 +222,7 @@ check_allowlist <- function(os, allowed = "https://raw.githubusercontent.com/apa
     # Try a remote allowlist so that we can add/remove without a release
     suppressWarnings(readLines(allowed)),
     # Fallback to default: allowed only on Ubuntu and CentOS/RHEL
-    error = function(e) c("ubuntu", "centos", "redhat", "rhel", "darwin")
+    error = function(e) c("ubuntu", "centos", "redhat", "rhel")
   )
   # allowlist should contain valid regular expressions (plain strings ok too)
   any(grepl(paste(allowlist, collapse = "|"), os))

From 63c7c4a327ff5b27a1ba6838253408e965c0a348 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 1 Feb 2024 23:43:21 +0900
Subject: [PATCH 19/74] GH-39874: [CI][C++][Windows] Use pre-installed OpenSSL
 (#39882)

### Rationale for this change

It seems that we can't use OpenSSL via Chocolatey.

```text
openssl v3.2.0 [Approved]
openssl package files install completed. Performing other installation steps.
Attempt to get headers for https://slproweb.com/download/Win64OpenSSL-3_2_0.exe failed.
  The remote file either doesn't exist, is unauthorized, or is forbidden for url 'https://slproweb.com/download/Win64OpenSSL-3_2_0.exe'. Exception calling "GetResponse" with "0" argument(s): "The remote server returned an error: (404) Not Found."
Downloading openssl 64 bit
  from 'https://slproweb.com/download/Win64OpenSSL-3_2_0.exe'
ERROR: The remote file either doesn't exist, is unauthorized, or is forbidden for url 'https://slproweb.com/download/Win64OpenSSL-3_2_0.exe'. Exception calling "GetResponse" with "0" argument(s): "The remote server returned an error: (404) Not Found."
This package is likely not broken for licensed users - see https://docs.chocolatey.org/en-us/features/private-cdn.
The install of openssl was NOT successful.
Error while running 'C:\ProgramData\chocolatey\lib\openssl\tools\chocolateyinstall.ps1'.
 See log for details.
```

### What changes are included in this PR?

Use pre-installed OpenSSL on self-hosted GitHub runner instead.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #39874

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/cpp.yml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index bd14f1b895bf6..9fbad06692bd2 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -284,10 +284,6 @@ jobs:
             /t REG_DWORD `
             /d 1 `
             /f
-      - name: Installed Packages
-        run: choco list
-      - name: Install Dependencies
-        run: choco install -y --no-progress openssl
       - name: Checkout Arrow
         uses: actions/checkout@v4
         with:

From c534749b3230f4ad640fe568d603c665b4bcee3d Mon Sep 17 00:00:00 2001
From: sgilmore10 <74676073+sgilmore10@users.noreply.github.com>
Date: Thu, 1 Feb 2024 10:21:12 -0500
Subject: [PATCH 20/74] GH-39885: [CI][MATLAB] Bump matlab-actions/setup-matlab
 and matlab-actions/run-tests from v1 to v2 (#39886)

### Rationale for this change

Upgrading our CI workflows to use the latest versions of [matlab-actions/setup-matlab](https://github.com/matlab-actions/setup-matlab/) and [matlab-actions/run-tests](https://github.com/matlab-actions/run-tests/).

### What changes are included in this PR?

1. Bumped version of `matlab-actions/setup-matlab` from `v1` to `v2`
2. Bumped version of `matlab-actions/runtests-matlab` from `v1` to `v2`

### Are these changes tested?

All MATLAB workflow checks passed.

### Are there any user-facing changes?

No.

* Closes: #39885

Authored-by: Sarah Gilmore <sgilmore@mathworks.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/matlab.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/matlab.yml b/.github/workflows/matlab.yml
index 512ff2bb929b3..eceeb551a0653 100644
--- a/.github/workflows/matlab.yml
+++ b/.github/workflows/matlab.yml
@@ -52,7 +52,7 @@ jobs:
       - name: Install ninja-build
         run: sudo apt-get install ninja-build
       - name: Install MATLAB
-        uses: matlab-actions/setup-matlab@v1
+        uses: matlab-actions/setup-matlab@v2
         with:
           release: R2023a
       - name: Install ccache
@@ -85,7 +85,7 @@ jobs:
           # Add the installation directory to the MATLAB Search Path by
           # setting the MATLABPATH environment variable.
           MATLABPATH: matlab/install/arrow_matlab
-        uses: matlab-actions/run-tests@v1
+        uses: matlab-actions/run-tests@v2
         with:
           select-by-folder: matlab/test
   macos:
@@ -100,7 +100,7 @@ jobs:
       - name: Install ninja-build
         run: brew install ninja
       - name: Install MATLAB
-        uses: matlab-actions/setup-matlab@v1
+        uses: matlab-actions/setup-matlab@v2
         with:
           release: R2023a
       - name: Install ccache
@@ -125,7 +125,7 @@ jobs:
           # Add the installation directory to the MATLAB Search Path by
           # setting the MATLABPATH environment variable.
           MATLABPATH: matlab/install/arrow_matlab
-        uses: matlab-actions/run-tests@v1
+        uses: matlab-actions/run-tests@v2
         with:
           select-by-folder: matlab/test 
   windows:
@@ -138,7 +138,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Install MATLAB
-        uses: matlab-actions/setup-matlab@v1
+        uses: matlab-actions/setup-matlab@v2
         with:
           release: R2023a
       - name: Download Timezone Database
@@ -171,6 +171,6 @@ jobs:
           # Add the installation directory to the MATLAB Search Path by
           # setting the MATLABPATH environment variable.
           MATLABPATH: matlab/install/arrow_matlab
-        uses: matlab-actions/run-tests@v1
+        uses: matlab-actions/run-tests@v2
         with:
           select-by-folder: matlab/test 

From 87b515e9207509aa3f77e3e1c0122be314a77e6d Mon Sep 17 00:00:00 2001
From: Matt Topol <zotthewizard@gmail.com>
Date: Thu, 1 Feb 2024 11:48:29 -0500
Subject: [PATCH 21/74] GH-39771: [C++][Device] Generic CopyBatchTo/CopyArrayTo
 memory types (#39772)

### Rationale for this change
Right now our MemoryManager interfaces operate solely at the buffer level and we do not provide any higher level facilities to copy an entire array or record batch between memory types. We should implement CopyArrayTo and CopyBatchTo functions which recursively utilize the buffer level copying to create a new Array/RecordBatch whose buffers have been copied to the destination memory manager.

### What changes are included in this PR?
Exposing a `CopyArrayTo` and `CopyBatchTo` function for copying entire Array or RecordBatches between memory types.

### Are these changes tested?
Tests are still being written but will be added.

* Closes: #39771

Authored-by: Matt Topol <zotthewizard@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 cpp/src/arrow/array/array_base.cc    | 12 +++++++++
 cpp/src/arrow/array/array_base.h     | 16 ++++++++++++
 cpp/src/arrow/array/data.cc          | 39 ++++++++++++++++++++++++++++
 cpp/src/arrow/array/data.h           | 19 +++++++++++---
 cpp/src/arrow/buffer.h               |  2 +-
 cpp/src/arrow/c/bridge.cc            |  2 +-
 cpp/src/arrow/c/bridge_test.cc       |  4 ++-
 cpp/src/arrow/device.cc              |  2 ++
 cpp/src/arrow/gpu/cuda_context.cc    |  5 ++++
 cpp/src/arrow/ipc/read_write_test.cc | 27 +++----------------
 cpp/src/arrow/record_batch.cc        | 24 +++++++++++++++++
 cpp/src/arrow/record_batch.h         | 19 ++++++++++++++
 12 files changed, 142 insertions(+), 29 deletions(-)

diff --git a/cpp/src/arrow/array/array_base.cc b/cpp/src/arrow/array/array_base.cc
index b483ec420cc3c..6927f51283eb7 100644
--- a/cpp/src/arrow/array/array_base.cc
+++ b/cpp/src/arrow/array/array_base.cc
@@ -307,6 +307,18 @@ Result<std::shared_ptr<Array>> Array::View(
   return MakeArray(result);
 }
 
+Result<std::shared_ptr<Array>> Array::CopyTo(
+    const std::shared_ptr<MemoryManager>& to) const {
+  ARROW_ASSIGN_OR_RAISE(auto copied_data, data()->CopyTo(to));
+  return MakeArray(copied_data);
+}
+
+Result<std::shared_ptr<Array>> Array::ViewOrCopyTo(
+    const std::shared_ptr<MemoryManager>& to) const {
+  ARROW_ASSIGN_OR_RAISE(auto new_data, data()->ViewOrCopyTo(to));
+  return MakeArray(new_data);
+}
+
 // ----------------------------------------------------------------------
 // NullArray
 
diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h
index 7e857bf20568e..6411aebf80442 100644
--- a/cpp/src/arrow/array/array_base.h
+++ b/cpp/src/arrow/array/array_base.h
@@ -165,6 +165,22 @@ class ARROW_EXPORT Array {
   /// An error is returned if the types are not layout-compatible.
   Result<std::shared_ptr<Array>> View(const std::shared_ptr<DataType>& type) const;
 
+  /// \brief Construct a copy of the array with all buffers on destination
+  /// Memory Manager
+  ///
+  /// This method recursively copies the array's buffers and those of its children
+  /// onto the destination MemoryManager device and returns the new Array.
+  Result<std::shared_ptr<Array>> CopyTo(const std::shared_ptr<MemoryManager>& to) const;
+
+  /// \brief Construct a new array attempting to zero-copy view if possible.
+  ///
+  /// Like CopyTo this method recursively goes through all of the array's buffers
+  /// and those of it's children and first attempts to create zero-copy
+  /// views on the destination MemoryManager device. If it can't, it falls back
+  /// to performing a copy. See Buffer::ViewOrCopy.
+  Result<std::shared_ptr<Array>> ViewOrCopyTo(
+      const std::shared_ptr<MemoryManager>& to) const;
+
   /// Construct a zero-copy slice of the array with the indicated offset and
   /// length
   ///
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index 8454ac8f1d5fb..80c411dfa6a6d 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -27,6 +27,7 @@
 
 #include "arrow/array/util.h"
 #include "arrow/buffer.h"
+#include "arrow/device.h"
 #include "arrow/scalar.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
@@ -36,6 +37,7 @@
 #include "arrow/util/dict_util.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/range.h"
 #include "arrow/util/ree_util.h"
 #include "arrow/util/slice_util_internal.h"
 #include "arrow/util/union_util.h"
@@ -140,6 +142,43 @@ std::shared_ptr<ArrayData> ArrayData::Make(std::shared_ptr<DataType> type, int64
   return std::make_shared<ArrayData>(std::move(type), length, null_count, offset);
 }
 
+namespace {
+template <typename Fn>
+Result<std::shared_ptr<ArrayData>> CopyToImpl(const ArrayData& data,
+                                              const std::shared_ptr<MemoryManager>& to,
+                                              Fn&& copy_fn) {
+  auto output = ArrayData::Make(data.type, data.length, data.null_count, data.offset);
+  output->buffers.resize(data.buffers.size());
+  for (auto&& [buf, out_buf] : internal::Zip(data.buffers, output->buffers)) {
+    if (buf) {
+      ARROW_ASSIGN_OR_RAISE(out_buf, copy_fn(buf, to));
+    }
+  }
+
+  output->child_data.reserve(data.child_data.size());
+  for (const auto& child : data.child_data) {
+    ARROW_ASSIGN_OR_RAISE(auto copied, CopyToImpl(*child, to, copy_fn));
+    output->child_data.push_back(std::move(copied));
+  }
+
+  if (data.dictionary) {
+    ARROW_ASSIGN_OR_RAISE(output->dictionary, CopyToImpl(*data.dictionary, to, copy_fn));
+  }
+
+  return output;
+}
+}  // namespace
+
+Result<std::shared_ptr<ArrayData>> ArrayData::CopyTo(
+    const std::shared_ptr<MemoryManager>& to) const {
+  return CopyToImpl(*this, to, MemoryManager::CopyBuffer);
+}
+
+Result<std::shared_ptr<ArrayData>> ArrayData::ViewOrCopyTo(
+    const std::shared_ptr<MemoryManager>& to) const {
+  return CopyToImpl(*this, to, Buffer::ViewOrCopy);
+}
+
 std::shared_ptr<ArrayData> ArrayData::Slice(int64_t off, int64_t len) const {
   ARROW_CHECK_LE(off, length) << "Slice offset (" << off
                               << ") greater than array length (" << length << ")";
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index edd443adc43c4..d8a6663cec580 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -27,6 +27,7 @@
 #include "arrow/buffer.h"
 #include "arrow/result.h"
 #include "arrow/type.h"
+#include "arrow/type_fwd.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/span.h"
@@ -34,9 +35,6 @@
 
 namespace arrow {
 
-class Array;
-struct ArrayData;
-
 namespace internal {
 // ----------------------------------------------------------------------
 // Null handling for types without a validity bitmap and the dictionary type
@@ -183,6 +181,21 @@ struct ARROW_EXPORT ArrayData {
 
   std::shared_ptr<ArrayData> Copy() const { return std::make_shared<ArrayData>(*this); }
 
+  /// \brief Copy all buffers and children recursively to destination MemoryManager
+  ///
+  /// This utilizes MemoryManager::CopyBuffer to create a new ArrayData object
+  /// recursively copying the buffers and all child buffers to the destination
+  /// memory manager. This includes dictionaries if applicable.
+  Result<std::shared_ptr<ArrayData>> CopyTo(
+      const std::shared_ptr<MemoryManager>& to) const;
+  /// \brief View or Copy this ArrayData to destination memory manager.
+  ///
+  /// Tries to view the buffer contents on the given memory manager's device
+  /// if possible (to avoid a copy) but falls back to copying if a no-copy view
+  /// isn't supported.
+  Result<std::shared_ptr<ArrayData>> ViewOrCopyTo(
+      const std::shared_ptr<MemoryManager>& to) const;
+
   bool IsNull(int64_t i) const { return !IsValid(i); }
 
   bool IsValid(int64_t i) const {
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 52fd94ec1f7d4..258a9faac7361 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -359,7 +359,7 @@ class ARROW_EXPORT Buffer {
   static Result<std::shared_ptr<Buffer>> ViewOrCopy(
       std::shared_ptr<Buffer> source, const std::shared_ptr<MemoryManager>& to);
 
-  virtual std::shared_ptr<Device::SyncEvent> device_sync_event() { return NULLPTR; }
+  virtual std::shared_ptr<Device::SyncEvent> device_sync_event() const { return NULLPTR; }
 
  protected:
   bool is_mutable_;
diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 238afb0328672..172ed8962ce77 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -1466,7 +1466,7 @@ class ImportedBuffer : public Buffer {
 
   ~ImportedBuffer() override = default;
 
-  std::shared_ptr<Device::SyncEvent> device_sync_event() override {
+  std::shared_ptr<Device::SyncEvent> device_sync_event() const override {
     return import_->device_sync_;
   }
 
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 58bbc9282c204..321ec36c38d8c 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -1282,7 +1282,9 @@ class MyBuffer final : public MutableBuffer {
     default_memory_pool()->Free(const_cast<uint8_t*>(data_), size_);
   }
 
-  std::shared_ptr<Device::SyncEvent> device_sync_event() override { return device_sync_; }
+  std::shared_ptr<Device::SyncEvent> device_sync_event() const override {
+    return device_sync_;
+  }
 
  protected:
   std::shared_ptr<Device::SyncEvent> device_sync_;
diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc
index de709923dc44e..616f89aae896f 100644
--- a/cpp/src/arrow/device.cc
+++ b/cpp/src/arrow/device.cc
@@ -20,8 +20,10 @@
 #include <cstring>
 #include <utility>
 
+#include "arrow/array.h"
 #include "arrow/buffer.h"
 #include "arrow/io/memory.h"
+#include "arrow/record_batch.h"
 #include "arrow/result.h"
 #include "arrow/util/logging.h"
 
diff --git a/cpp/src/arrow/gpu/cuda_context.cc b/cpp/src/arrow/gpu/cuda_context.cc
index 81542d339bd70..988cc1f25b91c 100644
--- a/cpp/src/arrow/gpu/cuda_context.cc
+++ b/cpp/src/arrow/gpu/cuda_context.cc
@@ -433,6 +433,11 @@ Result<std::shared_ptr<Buffer>> CudaMemoryManager::CopyBufferTo(
 Result<std::unique_ptr<Buffer>> CudaMemoryManager::CopyNonOwnedTo(
     const Buffer& buf, const std::shared_ptr<MemoryManager>& to) {
   if (to->is_cpu()) {
+    auto sync_event = buf.device_sync_event();
+    if (sync_event) {
+      RETURN_NOT_OK(sync_event->Wait());
+    }
+
     // Device-to-CPU copy
     std::unique_ptr<Buffer> dest;
     ARROW_ASSIGN_OR_RAISE(auto from_context, cuda_device()->GetContext());
diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc
index bd2c2b716d502..c5075299a3e35 100644
--- a/cpp/src/arrow/ipc/read_write_test.cc
+++ b/cpp/src/arrow/ipc/read_write_test.cc
@@ -1336,30 +1336,11 @@ class CopyCollectListener : public CollectListener {
 
   Status OnRecordBatchWithMetadataDecoded(
       RecordBatchWithMetadata record_batch_with_metadata) override {
-    auto& record_batch = record_batch_with_metadata.batch;
-    for (auto column_data : record_batch->column_data()) {
-      ARROW_RETURN_NOT_OK(CopyArrayData(column_data));
-    }
-    return CollectListener::OnRecordBatchWithMetadataDecoded(record_batch_with_metadata);
-  }
+    ARROW_ASSIGN_OR_RAISE(
+        record_batch_with_metadata.batch,
+        record_batch_with_metadata.batch->CopyTo(default_cpu_memory_manager()));
 
- private:
-  Status CopyArrayData(std::shared_ptr<ArrayData> data) {
-    auto& buffers = data->buffers;
-    for (size_t i = 0; i < buffers.size(); ++i) {
-      auto& buffer = buffers[i];
-      if (!buffer) {
-        continue;
-      }
-      ARROW_ASSIGN_OR_RAISE(buffers[i], Buffer::Copy(buffer, buffer->memory_manager()));
-    }
-    for (auto child_data : data->child_data) {
-      ARROW_RETURN_NOT_OK(CopyArrayData(child_data));
-    }
-    if (data->dictionary) {
-      ARROW_RETURN_NOT_OK(CopyArrayData(data->dictionary));
-    }
-    return Status::OK();
+    return CollectListener::OnRecordBatchWithMetadataDecoded(record_batch_with_metadata);
   }
 };
 
diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index 457135fa400d5..ca6b45af3d6b4 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -357,6 +357,30 @@ Status ValidateBatch(const RecordBatch& batch, bool full_validation) {
 
 }  // namespace
 
+Result<std::shared_ptr<RecordBatch>> RecordBatch::CopyTo(
+    const std::shared_ptr<MemoryManager>& to) const {
+  ArrayVector copied_columns;
+  copied_columns.reserve(num_columns());
+  for (const auto& col : columns()) {
+    ARROW_ASSIGN_OR_RAISE(auto c, col->CopyTo(to));
+    copied_columns.push_back(std::move(c));
+  }
+
+  return Make(schema_, num_rows(), std::move(copied_columns));
+}
+
+Result<std::shared_ptr<RecordBatch>> RecordBatch::ViewOrCopyTo(
+    const std::shared_ptr<MemoryManager>& to) const {
+  ArrayVector copied_columns;
+  copied_columns.reserve(num_columns());
+  for (const auto& col : columns()) {
+    ARROW_ASSIGN_OR_RAISE(auto c, col->ViewOrCopyTo(to));
+    copied_columns.push_back(std::move(c));
+  }
+
+  return Make(schema_, num_rows(), std::move(copied_columns));
+}
+
 Status RecordBatch::Validate() const {
   return ValidateBatch(*this, /*full_validation=*/false);
 }
diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index 1a66fc3fb5629..79f93a7b5997f 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -186,6 +186,25 @@ class ARROW_EXPORT RecordBatch {
   /// \return the number of rows (the corresponding length of each column)
   int64_t num_rows() const { return num_rows_; }
 
+  /// \brief Copy the entire RecordBatch to destination MemoryManager
+  ///
+  /// This uses Array::CopyTo on each column of the record batch to create
+  /// a new record batch where all underlying buffers for the columns have
+  /// been copied to the destination MemoryManager. This uses
+  /// MemoryManager::CopyBuffer under the hood.
+  Result<std::shared_ptr<RecordBatch>> CopyTo(
+      const std::shared_ptr<MemoryManager>& to) const;
+
+  /// \brief View or Copy the entire RecordBatch to destination MemoryManager
+  ///
+  /// This uses Array::ViewOrCopyTo on each column of the record batch to create
+  /// a new record batch where all underlying buffers for the columns have
+  /// been zero-copy viewed on the destination MemoryManager, falling back
+  /// to performing a copy if it can't be viewed as a zero-copy buffer. This uses
+  /// Buffer::ViewOrCopy under the hood.
+  Result<std::shared_ptr<RecordBatch>> ViewOrCopyTo(
+      const std::shared_ptr<MemoryManager>& to) const;
+
   /// \brief Slice each of the arrays in the record batch
   /// \param[in] offset the starting offset to slice, through end of batch
   /// \return new record batch

From f9b7ac2e922bceed8bab09b1e28d7261cbe8b41d Mon Sep 17 00:00:00 2001
From: Vibhatha Lakmal Abeykoon <vibhatha@users.noreply.github.com>
Date: Thu, 1 Feb 2024 23:08:21 +0530
Subject: [PATCH 22/74] GH-37841: [Java] Dictionary decoding not using the
 compression factory from the ArrowReader (#38371)

### Rationale for this change

This PR addresses https://github.com/apache/arrow/issues/37841.

### What changes are included in this PR?

Adding compression-based write and read for Dictionary data.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No
* Closes: #37841

Lead-authored-by: Vibhatha Lakmal Abeykoon <vibhatha@gmail.com>
Co-authored-by: vibhatha <vibhatha@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../TestArrowReaderWriterWithCompression.java | 206 ++++++++++++++++--
 .../apache/arrow/vector/ipc/ArrowReader.java  |   2 +-
 .../apache/arrow/vector/ipc/ArrowWriter.java  |  23 +-
 3 files changed, 201 insertions(+), 30 deletions(-)

diff --git a/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java b/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java
index 6104cb1a132e4..af28333746290 100644
--- a/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java
+++ b/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java
@@ -18,7 +18,9 @@
 package org.apache.arrow.compression;
 
 import java.io.ByteArrayOutputStream;
+import java.io.IOException;
 import java.nio.channels.Channels;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -27,63 +29,223 @@
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.GenerateSampleData;
+import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.compression.CompressionUtil;
 import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
 import org.apache.arrow.vector.ipc.ArrowFileReader;
 import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
 import org.apache.arrow.vector.ipc.message.IpcOption;
 import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.types.pojo.Schema;
 import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel;
+import org.junit.After;
 import org.junit.Assert;
-import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 public class TestArrowReaderWriterWithCompression {
 
-  @Test
-  public void testArrowFileZstdRoundTrip() throws Exception {
-    // Prepare sample data
-    final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+  private BufferAllocator allocator;
+  private ByteArrayOutputStream out;
+  private VectorSchemaRoot root;
+
+  @BeforeEach
+  public void setup() {
+    if (allocator == null) {
+      allocator = new RootAllocator(Integer.MAX_VALUE);
+    }
+    out = new ByteArrayOutputStream();
+    root = null;
+  }
+
+  @After
+  public void tearDown() {
+    if (root != null) {
+      root.close();
+    }
+    if (allocator != null) {
+      allocator.close();
+    }
+    if (out != null) {
+      out.reset();
+    }
+
+  }
+
+  private void createAndWriteArrowFile(DictionaryProvider provider,
+      CompressionUtil.CodecType codecType) throws IOException {
     List<Field> fields = new ArrayList<>();
     fields.add(new Field("col", FieldType.notNullable(new ArrowType.Utf8()), new ArrayList<>()));
-    VectorSchemaRoot root = VectorSchemaRoot.create(new Schema(fields), allocator);
+    root = VectorSchemaRoot.create(new Schema(fields), allocator);
+
     final int rowCount = 10;
     GenerateSampleData.generateTestData(root.getVector(0), rowCount);
     root.setRowCount(rowCount);
 
-    // Write an in-memory compressed arrow file
-    ByteArrayOutputStream out = new ByteArrayOutputStream();
-    try (final ArrowFileWriter writer =
-           new ArrowFileWriter(root, null, Channels.newChannel(out), new HashMap<>(),
-             IpcOption.DEFAULT, CommonsCompressionFactory.INSTANCE, CompressionUtil.CodecType.ZSTD, Optional.of(7))) {
+    try (final ArrowFileWriter writer = new ArrowFileWriter(root, provider, Channels.newChannel(out),
+        new HashMap<>(), IpcOption.DEFAULT, CommonsCompressionFactory.INSTANCE, codecType, Optional.of(7))) {
       writer.start();
       writer.writeBatch();
       writer.end();
     }
+  }
+
+  private void createAndWriteArrowStream(DictionaryProvider provider,
+                                       CompressionUtil.CodecType codecType) throws IOException {
+    List<Field> fields = new ArrayList<>();
+    fields.add(new Field("col", FieldType.notNullable(new ArrowType.Utf8()), new ArrayList<>()));
+    root = VectorSchemaRoot.create(new Schema(fields), allocator);
+
+    final int rowCount = 10;
+    GenerateSampleData.generateTestData(root.getVector(0), rowCount);
+    root.setRowCount(rowCount);
+
+    try (final ArrowStreamWriter writer = new ArrowStreamWriter(root, provider, Channels.newChannel(out),
+            IpcOption.DEFAULT, CommonsCompressionFactory.INSTANCE, codecType, Optional.of(7))) {
+      writer.start();
+      writer.writeBatch();
+      writer.end();
+    }
+  }
 
-    // Read the in-memory compressed arrow file with CommonsCompressionFactory provided
+  private Dictionary createDictionary(VarCharVector dictionaryVector) {
+    setVector(dictionaryVector,
+        "foo".getBytes(StandardCharsets.UTF_8),
+        "bar".getBytes(StandardCharsets.UTF_8),
+        "baz".getBytes(StandardCharsets.UTF_8));
+
+    return new Dictionary(dictionaryVector,
+        new DictionaryEncoding(/*id=*/1L, /*ordered=*/false, /*indexType=*/null));
+  }
+
+  @Test
+  public void testArrowFileZstdRoundTrip() throws Exception {
+    createAndWriteArrowFile(null, CompressionUtil.CodecType.ZSTD);
+    // with compression
+    try (ArrowFileReader reader =
+        new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+            CommonsCompressionFactory.INSTANCE)) {
+      Assertions.assertEquals(1, reader.getRecordBlocks().size());
+      Assertions.assertTrue(reader.loadNextBatch());
+      Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot()));
+      Assertions.assertFalse(reader.loadNextBatch());
+    }
+    // without compression
     try (ArrowFileReader reader =
-           new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()),
-             allocator, CommonsCompressionFactory.INSTANCE)) {
-      Assert.assertEquals(1, reader.getRecordBlocks().size());
+        new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+            NoCompressionCodec.Factory.INSTANCE)) {
+      Assertions.assertEquals(1, reader.getRecordBlocks().size());
+      Exception exception = Assert.assertThrows(IllegalArgumentException.class,
+          reader::loadNextBatch);
+      Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
+              exception.getMessage());
+    }
+  }
+
+  @Test
+  public void testArrowStreamZstdRoundTrip() throws Exception {
+    createAndWriteArrowStream(null, CompressionUtil.CodecType.ZSTD);
+    // with compression
+    try (ArrowStreamReader reader =
+                 new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+                         CommonsCompressionFactory.INSTANCE)) {
       Assert.assertTrue(reader.loadNextBatch());
       Assert.assertTrue(root.equals(reader.getVectorSchemaRoot()));
       Assert.assertFalse(reader.loadNextBatch());
     }
+    // without compression
+    try (ArrowStreamReader reader =
+                 new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+                         NoCompressionCodec.Factory.INSTANCE)) {
+      Exception exception = Assert.assertThrows(IllegalArgumentException.class,
+              reader::loadNextBatch);
+      Assert.assertEquals(
+              "Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
+              exception.getMessage()
+      );
+    }
+  }
 
-    // Read the in-memory compressed arrow file without CompressionFactory provided
+  @Test
+  public void testArrowFileZstdRoundTripWithDictionary() throws Exception {
+    VarCharVector dictionaryVector = (VarCharVector)
+        FieldType.nullable(new ArrowType.Utf8()).createNewSingleVector("f1_file", allocator, null);
+    Dictionary dictionary = createDictionary(dictionaryVector);
+    DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+    provider.put(dictionary);
+
+    createAndWriteArrowFile(provider, CompressionUtil.CodecType.ZSTD);
+
+    // with compression
+    try (ArrowFileReader reader =
+        new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+            CommonsCompressionFactory.INSTANCE)) {
+      Assertions.assertEquals(1, reader.getRecordBlocks().size());
+      Assertions.assertTrue(reader.loadNextBatch());
+      Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot()));
+      Assertions.assertFalse(reader.loadNextBatch());
+    }
+    // without compression
     try (ArrowFileReader reader =
-           new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()),
-             allocator, NoCompressionCodec.Factory.INSTANCE)) {
-      Assert.assertEquals(1, reader.getRecordBlocks().size());
+        new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+            NoCompressionCodec.Factory.INSTANCE)) {
+      Assertions.assertEquals(1, reader.getRecordBlocks().size());
+      Exception exception = Assert.assertThrows(IllegalArgumentException.class,
+          reader::loadNextBatch);
+      Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
+              exception.getMessage());
+    }
+    dictionaryVector.close();
+  }
+
+  @Test
+  public void testArrowStreamZstdRoundTripWithDictionary() throws Exception {
+    VarCharVector dictionaryVector = (VarCharVector)
+            FieldType.nullable(new ArrowType.Utf8()).createNewSingleVector("f1_stream", allocator, null);
+    Dictionary dictionary = createDictionary(dictionaryVector);
+    DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+    provider.put(dictionary);
+
+    createAndWriteArrowStream(provider, CompressionUtil.CodecType.ZSTD);
+
+    // with compression
+    try (ArrowStreamReader reader =
+                 new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+                         CommonsCompressionFactory.INSTANCE)) {
+      Assertions.assertTrue(reader.loadNextBatch());
+      Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot()));
+      Assertions.assertFalse(reader.loadNextBatch());
+    }
+    // without compression
+    try (ArrowStreamReader reader =
+                 new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+                         NoCompressionCodec.Factory.INSTANCE)) {
+      Exception exception = Assert.assertThrows(IllegalArgumentException.class,
+              reader::loadNextBatch);
+      Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
+              exception.getMessage());
+    }
+    dictionaryVector.close();
+  }
 
-      Exception exception = Assert.assertThrows(IllegalArgumentException.class, () -> reader.loadNextBatch());
-      String expectedMessage = "Please add arrow-compression module to use CommonsCompressionFactory for ZSTD";
-      Assert.assertEquals(expectedMessage, exception.getMessage());
+  public static void setVector(VarCharVector vector, byte[]... values) {
+    final int length = values.length;
+    vector.allocateNewSafe();
+    for (int i = 0; i < length; i++) {
+      if (values[i] != null) {
+        vector.set(i, values[i]);
+      }
     }
+    vector.setValueCount(length);
   }
 
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java
index 04c57d7e82fef..01f4e925c69b3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java
@@ -251,7 +251,7 @@ private void load(ArrowDictionaryBatch dictionaryBatch, FieldVector vector) {
     VectorSchemaRoot root = new VectorSchemaRoot(
         Collections.singletonList(vector.getField()),
         Collections.singletonList(vector), 0);
-    VectorLoader loader = new VectorLoader(root);
+    VectorLoader loader = new VectorLoader(root, this.compressionFactory);
     try {
       loader.load(dictionaryBatch.getDictionary());
     } finally {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java
index a33c55de53f23..1cc201ae56f4b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java
@@ -61,9 +61,14 @@ public abstract class ArrowWriter implements AutoCloseable {
   private final DictionaryProvider dictionaryProvider;
   private final Set<Long> dictionaryIdsUsed = new HashSet<>();
 
+  private final CompressionCodec.Factory compressionFactory;
+  private final CompressionUtil.CodecType codecType;
+  private final Optional<Integer> compressionLevel;
   private boolean started = false;
   private boolean ended = false;
 
+  private final CompressionCodec codec;
+
   protected IpcOption option;
 
   protected ArrowWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out) {
@@ -89,16 +94,19 @@ protected ArrowWriter(VectorSchemaRoot root, DictionaryProvider provider, Writab
   protected ArrowWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out, IpcOption option,
                         CompressionCodec.Factory compressionFactory, CompressionUtil.CodecType codecType,
                         Optional<Integer> compressionLevel) {
-    this.unloader = new VectorUnloader(
-        root, /*includeNullCount*/ true,
-        compressionLevel.isPresent() ?
-            compressionFactory.createCodec(codecType, compressionLevel.get()) :
-            compressionFactory.createCodec(codecType),
-        /*alignBuffers*/ true);
     this.out = new WriteChannel(out);
     this.option = option;
     this.dictionaryProvider = provider;
 
+    this.compressionFactory = compressionFactory;
+    this.codecType = codecType;
+    this.compressionLevel = compressionLevel;
+    this.codec = this.compressionLevel.isPresent() ?
+            this.compressionFactory.createCodec(this.codecType, this.compressionLevel.get()) :
+            this.compressionFactory.createCodec(this.codecType);
+    this.unloader = new VectorUnloader(root, /*includeNullCount*/ true, codec,
+        /*alignBuffers*/ true);
+
     List<Field> fields = new ArrayList<>(root.getSchema().getFields().size());
 
     MetadataV4UnionChecker.checkForUnion(root.getSchema().getFields().iterator(), option.metadataVersion);
@@ -133,7 +141,8 @@ protected void writeDictionaryBatch(Dictionary dictionary) throws IOException {
         Collections.singletonList(vector.getField()),
         Collections.singletonList(vector),
         count);
-    VectorUnloader unloader = new VectorUnloader(dictRoot);
+    VectorUnloader unloader = new VectorUnloader(dictRoot, /*includeNullCount*/ true, this.codec,
+        /*alignBuffers*/ true);
     ArrowRecordBatch batch = unloader.getRecordBatch();
     ArrowDictionaryBatch dictionaryBatch = new ArrowDictionaryBatch(id, batch, false);
     try {

From a57363867a6d88d0a7f17767571ab57dbb70cbfd Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 1 Feb 2024 13:33:45 -0500
Subject: [PATCH 23/74] MINOR: [JS] Bump esbuild from 0.19.2 to 0.20.0 in /js
 (#39891)

Bumps [esbuild](https://github.com/evanw/esbuild) from 0.19.2 to 0.20.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/evanw/esbuild/releases">esbuild's
releases</a>.</em></p>
<blockquote>
<h2>v0.20.0</h2>
<p><strong>This release deliberately contains backwards-incompatible
changes.</strong> To avoid automatically picking up releases like this,
you should either be pinning the exact version of <code>esbuild</code>
in your <code>package.json</code> file (recommended) or be using a
version range syntax that only accepts patch upgrades such as
<code>^0.19.0</code> or <code>~0.19.0</code>. See npm's documentation
about <a
href="https://docs.npmjs.com/cli/v6/using-npm/semver/">semver</a> for
more information.</p>
<p>This time there is only one breaking change, and it only matters for
people using Deno. Deno tests that use esbuild will now fail unless you
make the change described below.</p>
<ul>
<li>
<p>Work around API deprecations in Deno 1.40.x (<a
href="https://redirect.github.com/evanw/esbuild/issues/3609">#3609</a>,
<a
href="https://redirect.github.com/evanw/esbuild/pull/3611">#3611</a>)</p>
<p><a href="https://deno.com/blog/v1.40">Deno 1.40.0</a> was just
released and introduced run-time warnings about certain APIs that
esbuild uses. With this release, esbuild will work around these run-time
warnings by using newer APIs if they are present and falling back to the
original APIs otherwise. This should avoid the warnings without breaking
compatibility with older versions of Deno.</p>
<p>Unfortunately, doing this introduces a breaking change. The newer
child process APIs lack a way to synchronously terminate esbuild's child
process, so calling <code>esbuild.stop()</code> from within a Deno test
is no longer sufficient to prevent Deno from failing a test that uses
esbuild's API (Deno fails tests that create a child process without
killing it before the test ends). To work around this, esbuild's
<code>stop()</code> function has been changed to return a promise, and
you now have to change <code>esbuild.stop()</code> to <code>await
esbuild.stop()</code> in all of your Deno tests.</p>
</li>
<li>
<p>Reorder implicit file extensions within <code>node_modules</code> (<a
href="https://redirect.github.com/evanw/esbuild/issues/3341">#3341</a>,
<a
href="https://redirect.github.com/evanw/esbuild/issues/3608">#3608</a>)</p>
<p>In <a
href="https://github.com/evanw/esbuild/releases/v0.18.0">version
0.18.0</a>, esbuild changed the behavior of implicit file extensions
within <code>node_modules</code> directories (i.e. in published
packages) to prefer <code>.js</code> over <code>.ts</code> even when the
<code>--resolve-extensions=</code> order prefers <code>.ts</code> over
<code>.js</code> (which it does by default). However, doing that also
accidentally made esbuild prefer <code>.css</code> over
<code>.ts</code>, which caused problems for people that published
packages containing both TypeScript and CSS in files with the same
name.</p>
<p>With this release, esbuild will reorder TypeScript file extensions
immediately after the last JavaScript file extensions in the implicit
file extension order instead of putting them at the end of the order.
Specifically the default implicit file extension order is
<code>.tsx,.ts,.jsx,.js,.css,.json</code> which used to become
<code>.jsx,.js,.css,.json,.tsx,.ts</code> in <code>node_modules</code>
directories. With this release it will now become
<code>.jsx,.js,.tsx,.ts,.css,.json</code> instead.</p>
<p>Why even rewrite the implicit file extension order at all? One reason
is because the <code>.js</code> file is more likely to behave correctly
than the <code>.ts</code> file. The behavior of the <code>.ts</code>
file may depend on <code>tsconfig.json</code> and the
<code>tsconfig.json</code> file may not even be published, or may use
<code>extends</code> to refer to a base <code>tsconfig.json</code> file
that wasn't published. People can get into this situation when they
forget to add all <code>.ts</code> files to their
<code>.npmignore</code> file before publishing to npm. Picking
<code>.js</code> over <code>.ts</code> helps make it more likely that
resulting bundle will behave correctly.</p>
</li>
</ul>
<h2>v0.19.12</h2>
<ul>
<li>
<p>The &quot;preserve&quot; JSX mode now preserves JSX text verbatim (<a
href="https://redirect.github.com/evanw/esbuild/issues/3605">#3605</a>)</p>
<p>The <a href="https://facebook.github.io/jsx/">JSX specification</a>
deliberately doesn't specify how JSX text is supposed to be interpreted
and there is no canonical way to interpret JSX text. Two most popular
interpretations are Babel and TypeScript. Yes <a
href="https://twitter.com/jarredsumner/status/1456118847937781764">they
are different</a> (esbuild <a
href="https://twitter.com/evanwallace/status/1456122279453208576">deliberately
follows TypeScript</a> by the way).</p>
<p>Previously esbuild normalized text to the TypeScript interpretation
when the &quot;preserve&quot; JSX mode is active. However,
&quot;preserve&quot; should arguably reproduce the original JSX text
verbatim so that whatever JSX transform runs after esbuild is free to
interpret it however it wants. So with this release, esbuild will now
pass JSX text through unmodified:</p>
<pre lang="jsx"><code>// Original code
let el =
  &lt;a href={'/'} title='&amp;apos;&amp;quot;'&gt; some text
    {foo}
      more text &lt;/a&gt;
<p>// Old output (with --loader=jsx --jsx=preserve)
let el = &lt;a href=&quot;/&quot; title={<code>'&amp;quot;</code>}&gt;
{&quot; some text&quot;}
{foo}
{&quot;more text &quot;}
&lt;/a&gt;;</p>
<p>// New output (with --loader=jsx --jsx=preserve)
let el = &lt;a href={&quot;/&quot;} title='&amp;apos;&amp;quot;'&gt;
some text
{foo}
more text &lt;/a&gt;;
</code></pre></p>
</li>
<li>
<p>Allow JSX elements as JSX attribute values</p>
<p>JSX has an obscure feature where you can use JSX elements in
attribute position without surrounding them with <code>{...}</code>. It
looks like this:</p>
</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/evanw/esbuild/blob/main/CHANGELOG.md">esbuild's
changelog</a>.</em></p>
<blockquote>
<h2>0.20.0</h2>
<p><strong>This release deliberately contains backwards-incompatible
changes.</strong> To avoid automatically picking up releases like this,
you should either be pinning the exact version of <code>esbuild</code>
in your <code>package.json</code> file (recommended) or be using a
version range syntax that only accepts patch upgrades such as
<code>^0.19.0</code> or <code>~0.19.0</code>. See npm's documentation
about <a
href="https://docs.npmjs.com/cli/v6/using-npm/semver/">semver</a> for
more information.</p>
<p>This time there is only one breaking change, and it only matters for
people using Deno. Deno tests that use esbuild will now fail unless you
make the change described below.</p>
<ul>
<li>
<p>Work around API deprecations in Deno 1.40.x (<a
href="https://redirect.github.com/evanw/esbuild/issues/3609">#3609</a>,
<a
href="https://redirect.github.com/evanw/esbuild/pull/3611">#3611</a>)</p>
<p><a href="https://deno.com/blog/v1.40">Deno 1.40.0</a> was just
released and introduced run-time warnings about certain APIs that
esbuild uses. With this release, esbuild will work around these run-time
warnings by using newer APIs if they are present and falling back to the
original APIs otherwise. This should avoid the warnings without breaking
compatibility with older versions of Deno.</p>
<p>Unfortunately, doing this introduces a breaking change. The newer
child process APIs lack a way to synchronously terminate esbuild's child
process, so calling <code>esbuild.stop()</code> from within a Deno test
is no longer sufficient to prevent Deno from failing a test that uses
esbuild's API (Deno fails tests that create a child process without
killing it before the test ends). To work around this, esbuild's
<code>stop()</code> function has been changed to return a promise, and
you now have to change <code>esbuild.stop()</code> to <code>await
esbuild.stop()</code> in all of your Deno tests.</p>
</li>
<li>
<p>Reorder implicit file extensions within <code>node_modules</code> (<a
href="https://redirect.github.com/evanw/esbuild/issues/3341">#3341</a>,
<a
href="https://redirect.github.com/evanw/esbuild/issues/3608">#3608</a>)</p>
<p>In <a
href="https://github.com/evanw/esbuild/releases/v0.18.0">version
0.18.0</a>, esbuild changed the behavior of implicit file extensions
within <code>node_modules</code> directories (i.e. in published
packages) to prefer <code>.js</code> over <code>.ts</code> even when the
<code>--resolve-extensions=</code> order prefers <code>.ts</code> over
<code>.js</code> (which it does by default). However, doing that also
accidentally made esbuild prefer <code>.css</code> over
<code>.ts</code>, which caused problems for people that published
packages containing both TypeScript and CSS in files with the same
name.</p>
<p>With this release, esbuild will reorder TypeScript file extensions
immediately after the last JavaScript file extensions in the implicit
file extension order instead of putting them at the end of the order.
Specifically the default implicit file extension order is
<code>.tsx,.ts,.jsx,.js,.css,.json</code> which used to become
<code>.jsx,.js,.css,.json,.tsx,.ts</code> in <code>node_modules</code>
directories. With this release it will now become
<code>.jsx,.js,.tsx,.ts,.css,.json</code> instead.</p>
<p>Why even rewrite the implicit file extension order at all? One reason
is because the <code>.js</code> file is more likely to behave correctly
than the <code>.ts</code> file. The behavior of the <code>.ts</code>
file may depend on <code>tsconfig.json</code> and the
<code>tsconfig.json</code> file may not even be published, or may use
<code>extends</code> to refer to a base <code>tsconfig.json</code> file
that wasn't published. People can get into this situation when they
forget to add all <code>.ts</code> files to their
<code>.npmignore</code> file before publishing to npm. Picking
<code>.js</code> over <code>.ts</code> helps make it more likely that
resulting bundle will behave correctly.</p>
</li>
</ul>
<h2>0.19.12</h2>
<ul>
<li>
<p>The &quot;preserve&quot; JSX mode now preserves JSX text verbatim (<a
href="https://redirect.github.com/evanw/esbuild/issues/3605">#3605</a>)</p>
<p>The <a href="https://facebook.github.io/jsx/">JSX specification</a>
deliberately doesn't specify how JSX text is supposed to be interpreted
and there is no canonical way to interpret JSX text. Two most popular
interpretations are Babel and TypeScript. Yes <a
href="https://twitter.com/jarredsumner/status/1456118847937781764">they
are different</a> (esbuild <a
href="https://twitter.com/evanwallace/status/1456122279453208576">deliberately
follows TypeScript</a> by the way).</p>
<p>Previously esbuild normalized text to the TypeScript interpretation
when the &quot;preserve&quot; JSX mode is active. However,
&quot;preserve&quot; should arguably reproduce the original JSX text
verbatim so that whatever JSX transform runs after esbuild is free to
interpret it however it wants. So with this release, esbuild will now
pass JSX text through unmodified:</p>
<pre lang="jsx"><code>// Original code
let el =
  &lt;a href={'/'} title='&amp;apos;&amp;quot;'&gt; some text
    {foo}
      more text &lt;/a&gt;
<p>// Old output (with --loader=jsx --jsx=preserve)
let el = &lt;a href=&quot;/&quot; title={<code>'&amp;quot;</code>}&gt;
{&quot; some text&quot;}
{foo}
{&quot;more text &quot;}
&lt;/a&gt;;</p>
<p>// New output (with --loader=jsx --jsx=preserve)
let el = &lt;a href={&quot;/&quot;} title='&amp;apos;&amp;quot;'&gt;
some text
{foo}
more text &lt;/a&gt;;
</code></pre></p>
</li>
<li>
<p>Allow JSX elements as JSX attribute values</p>
</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/evanw/esbuild/commit/2af5ccf478812d2d7226ad4435d46fbbb3419a8c"><code>2af5ccf</code></a>
publish 0.20.0 to npm</li>
<li><a
href="https://github.com/evanw/esbuild/commit/0bccf08675867c8ce6662b1ab4aae21973083d99"><code>0bccf08</code></a>
fix <a
href="https://redirect.github.com/esbuild/deno-esbuild/pull/5">esbuild/deno-esbuild#5</a></li>
<li><a
href="https://github.com/evanw/esbuild/commit/931f87db267cf86f63d940c0a77072ef45e96128"><code>931f87d</code></a>
work around api deprecations in deno 1.40.x (<a
href="https://redirect.github.com/evanw/esbuild/issues/3609">#3609</a>)
(<a
href="https://redirect.github.com/evanw/esbuild/issues/3611">#3611</a>)</li>
<li><a
href="https://github.com/evanw/esbuild/commit/22a9cf5754d402aabfe75aeda0266c3a970b0ee1"><code>22a9cf5</code></a>
fix <a
href="https://redirect.github.com/evanw/esbuild/issues/3341">#3341</a>,
fix <a
href="https://redirect.github.com/evanw/esbuild/issues/3608">#3608</a>:
sort <code>.ts</code> right after <code>.js</code></li>
<li><a
href="https://github.com/evanw/esbuild/commit/f8ec3007b68c5bfb755317e5c7051f63184c514b"><code>f8ec300</code></a>
run <code>npm pkg fix</code> as suggested by the npm cli</li>
<li><a
href="https://github.com/evanw/esbuild/commit/d7fd1ad35715cda76eb33343b7c07b275e402a2e"><code>d7fd1ad</code></a>
publish 0.19.12 to npm</li>
<li><a
href="https://github.com/evanw/esbuild/commit/e04a6900b7659146aef670e62a0d16c6f75cfd70"><code>e04a690</code></a>
fix <a
href="https://redirect.github.com/evanw/esbuild/issues/3605">#3605</a>:
print the original JSX AST unmodified</li>
<li><a
href="https://github.com/evanw/esbuild/commit/f5713992227188d137c485d27b6956c6de814b9a"><code>f571399</code></a>
allow jsx elements as jsx attribute values</li>
<li><a
href="https://github.com/evanw/esbuild/commit/a652e730ff07b9081470ef6965f3d54daa7b2aab"><code>a652e73</code></a>
run <code>make update-compat-table</code></li>
<li><a
href="https://github.com/evanw/esbuild/commit/35c0d65b9d4f29a26176404d2890d1b499634e9f"><code>35c0d65</code></a>
fix <a
href="https://redirect.github.com/evanw/esbuild/issues/3574">#3574</a>:
ts type parser bug with infer + extends</li>
<li>Additional commits viewable in <a
href="https://github.com/evanw/esbuild/compare/v0.19.2...v0.20.0">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=esbuild&package-manager=npm_and_yarn&previous-version=0.19.2&new-version=0.20.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 js/package.json |   2 +-
 js/yarn.lock    | 234 +++++++++++++++++++++++++-----------------------
 2 files changed, 121 insertions(+), 115 deletions(-)

diff --git a/js/package.json b/js/package.json
index 57f9267afa3a8..f96764d82245e 100644
--- a/js/package.json
+++ b/js/package.json
@@ -79,7 +79,7 @@
     "cross-env": "7.0.3",
     "del": "7.1.0",
     "del-cli": "5.1.0",
-    "esbuild": "0.19.2",
+    "esbuild": "0.20.0",
     "esbuild-plugin-alias": "0.2.1",
     "eslint": "8.52.0",
     "eslint-plugin-jest": "27.4.2",
diff --git a/js/yarn.lock b/js/yarn.lock
index 10d2a256e1cac..e7dead09bf8bb 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -416,225 +416,230 @@
   resolved "https://registry.npmjs.org/@discoveryjs/json-ext/-/json-ext-0.5.7.tgz#1d572bfbbe14b7704e0ba0f39b74815b84870d70"
   integrity sha512-dBVuXR082gk3jsFp7Rd/JI4kytwGHecnCoTtXFb7DB6CNHp4rg5k1bhg0nWdLGLnOV71lmDzGQaLMy8iPLY0pw==
 
+"@esbuild/aix-ppc64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.20.0.tgz#509621cca4e67caf0d18561a0c56f8b70237472f"
+  integrity sha512-fGFDEctNh0CcSwsiRPxiaqX0P5rq+AqE0SRhYGZ4PX46Lg1FNR6oCxJghf8YgY0WQEgQuh3lErUFE4KxLeRmmw==
+
 "@esbuild/android-arm64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.17.19.tgz#bafb75234a5d3d1b690e7c2956a599345e84a2fd"
   integrity sha512-KBMWvEZooR7+kzY0BtbTQn0OAYY7CsiydT63pVEaPtVYF0hXbUaOyZog37DKxK7NF3XacBJOpYT4adIJh+avxA==
 
-"@esbuild/android-arm64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.19.2.tgz#bc35990f412a749e948b792825eef7df0ce0e073"
-  integrity sha512-lsB65vAbe90I/Qe10OjkmrdxSX4UJDjosDgb8sZUKcg3oefEuW2OT2Vozz8ef7wrJbMcmhvCC+hciF8jY/uAkw==
+"@esbuild/android-arm64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.20.0.tgz#109a6fdc4a2783fc26193d2687827045d8fef5ab"
+  integrity sha512-aVpnM4lURNkp0D3qPoAzSG92VXStYmoVPOgXveAUoQBWRSuQzt51yvSju29J6AHPmwY1BjH49uR29oyfH1ra8Q==
 
 "@esbuild/android-arm@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.17.19.tgz#5898f7832c2298bc7d0ab53701c57beb74d78b4d"
   integrity sha512-rIKddzqhmav7MSmoFCmDIb6e2W57geRsM94gV2l38fzhXMwq7hZoClug9USI2pFRGL06f4IOPHHpFNOkWieR8A==
 
-"@esbuild/android-arm@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.19.2.tgz#edd1c8f23ba353c197f5b0337123c58ff2a56999"
-  integrity sha512-tM8yLeYVe7pRyAu9VMi/Q7aunpLwD139EY1S99xbQkT4/q2qa6eA4ige/WJQYdJ8GBL1K33pPFhPfPdJ/WzT8Q==
+"@esbuild/android-arm@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.20.0.tgz#1397a2c54c476c4799f9b9073550ede496c94ba5"
+  integrity sha512-3bMAfInvByLHfJwYPJRlpTeaQA75n8C/QKpEaiS4HrFWFiJlNI0vzq/zCjBrhAYcPyVPG7Eo9dMrcQXuqmNk5g==
 
 "@esbuild/android-x64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.17.19.tgz#658368ef92067866d95fb268719f98f363d13ae1"
   integrity sha512-uUTTc4xGNDT7YSArp/zbtmbhO0uEEK9/ETW29Wk1thYUJBz3IVnvgEiEwEa9IeLyvnpKrWK64Utw2bgUmDveww==
 
-"@esbuild/android-x64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.19.2.tgz#2dcdd6e6f1f2d82ea1b746abd8da5b284960f35a"
-  integrity sha512-qK/TpmHt2M/Hg82WXHRc/W/2SGo/l1thtDHZWqFq7oi24AjZ4O/CpPSu6ZuYKFkEgmZlFoa7CooAyYmuvnaG8w==
+"@esbuild/android-x64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.20.0.tgz#2b615abefb50dc0a70ac313971102f4ce2fdb3ca"
+  integrity sha512-uK7wAnlRvjkCPzh8jJ+QejFyrP8ObKuR5cBIsQZ+qbMunwR8sbd8krmMbxTLSrDhiPZaJYKQAU5Y3iMDcZPhyQ==
 
 "@esbuild/darwin-arm64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.17.19.tgz#584c34c5991b95d4d48d333300b1a4e2ff7be276"
   integrity sha512-80wEoCfF/hFKM6WE1FyBHc9SfUblloAWx6FJkFWTWiCoht9Mc0ARGEM47e67W9rI09YoUxJL68WHfDRYEAvOhg==
 
-"@esbuild/darwin-arm64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.19.2.tgz#55b36bc06d76f5c243987c1f93a11a80d8fc3b26"
-  integrity sha512-Ora8JokrvrzEPEpZO18ZYXkH4asCdc1DLdcVy8TGf5eWtPO1Ie4WroEJzwI52ZGtpODy3+m0a2yEX9l+KUn0tA==
+"@esbuild/darwin-arm64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.20.0.tgz#5c122ed799eb0c35b9d571097f77254964c276a2"
+  integrity sha512-AjEcivGAlPs3UAcJedMa9qYg9eSfU6FnGHJjT8s346HSKkrcWlYezGE8VaO2xKfvvlZkgAhyvl06OJOxiMgOYQ==
 
 "@esbuild/darwin-x64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.17.19.tgz#7751d236dfe6ce136cce343dce69f52d76b7f6cb"
   integrity sha512-IJM4JJsLhRYr9xdtLytPLSH9k/oxR3boaUIYiHkAawtwNOXKE8KoU8tMvryogdcT8AU+Bflmh81Xn6Q0vTZbQw==
 
-"@esbuild/darwin-x64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.19.2.tgz#982524af33a6424a3b5cb44bbd52559623ad719c"
-  integrity sha512-tP+B5UuIbbFMj2hQaUr6EALlHOIOmlLM2FK7jeFBobPy2ERdohI4Ka6ZFjZ1ZYsrHE/hZimGuU90jusRE0pwDw==
+"@esbuild/darwin-x64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.20.0.tgz#9561d277002ba8caf1524f209de2b22e93d170c1"
+  integrity sha512-bsgTPoyYDnPv8ER0HqnJggXK6RyFy4PH4rtsId0V7Efa90u2+EifxytE9pZnsDgExgkARy24WUQGv9irVbTvIw==
 
 "@esbuild/freebsd-arm64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.17.19.tgz#cacd171665dd1d500f45c167d50c6b7e539d5fd2"
   integrity sha512-pBwbc7DufluUeGdjSU5Si+P3SoMF5DQ/F/UmTSb8HXO80ZEAJmrykPyzo1IfNbAoaqw48YRpv8shwd1NoI0jcQ==
 
-"@esbuild/freebsd-arm64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.19.2.tgz#8e478a0856645265fe79eac4b31b52193011ee06"
-  integrity sha512-YbPY2kc0acfzL1VPVK6EnAlig4f+l8xmq36OZkU0jzBVHcOTyQDhnKQaLzZudNJQyymd9OqQezeaBgkTGdTGeQ==
+"@esbuild/freebsd-arm64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.20.0.tgz#84178986a3138e8500d17cc380044868176dd821"
+  integrity sha512-kQ7jYdlKS335mpGbMW5tEe3IrQFIok9r84EM3PXB8qBFJPSc6dpWfrtsC/y1pyrz82xfUIn5ZrnSHQQsd6jebQ==
 
 "@esbuild/freebsd-x64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.17.19.tgz#0769456eee2a08b8d925d7c00b79e861cb3162e4"
   integrity sha512-4lu+n8Wk0XlajEhbEffdy2xy53dpR06SlzvhGByyg36qJw6Kpfk7cp45DR/62aPH9mtJRmIyrXAS5UWBrJT6TQ==
 
-"@esbuild/freebsd-x64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.19.2.tgz#01b96604f2540db023c73809bb8ae6cd1692d6f3"
-  integrity sha512-nSO5uZT2clM6hosjWHAsS15hLrwCvIWx+b2e3lZ3MwbYSaXwvfO528OF+dLjas1g3bZonciivI8qKR/Hm7IWGw==
+"@esbuild/freebsd-x64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.20.0.tgz#3f9ce53344af2f08d178551cd475629147324a83"
+  integrity sha512-uG8B0WSepMRsBNVXAQcHf9+Ko/Tr+XqmK7Ptel9HVmnykupXdS4J7ovSQUIi0tQGIndhbqWLaIL/qO/cWhXKyQ==
 
 "@esbuild/linux-arm64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.17.19.tgz#38e162ecb723862c6be1c27d6389f48960b68edb"
   integrity sha512-ct1Tg3WGwd3P+oZYqic+YZF4snNl2bsnMKRkb3ozHmnM0dGWuxcPTTntAF6bOP0Sp4x0PjSF+4uHQ1xvxfRKqg==
 
-"@esbuild/linux-arm64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.19.2.tgz#7e5d2c7864c5c83ec789b59c77cd9c20d2594916"
-  integrity sha512-ig2P7GeG//zWlU0AggA3pV1h5gdix0MA3wgB+NsnBXViwiGgY77fuN9Wr5uoCrs2YzaYfogXgsWZbm+HGr09xg==
+"@esbuild/linux-arm64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.20.0.tgz#24efa685515689df4ecbc13031fa0a9dda910a11"
+  integrity sha512-uTtyYAP5veqi2z9b6Gr0NUoNv9F/rOzI8tOD5jKcCvRUn7T60Bb+42NDBCWNhMjkQzI0qqwXkQGo1SY41G52nw==
 
 "@esbuild/linux-arm@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.17.19.tgz#1a2cd399c50040184a805174a6d89097d9d1559a"
   integrity sha512-cdmT3KxjlOQ/gZ2cjfrQOtmhG4HJs6hhvm3mWSRDPtZ/lP5oe8FWceS10JaSJC13GBd4eH/haHnqf7hhGNLerA==
 
-"@esbuild/linux-arm@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.19.2.tgz#c32ae97bc0246664a1cfbdb4a98e7b006d7db8ae"
-  integrity sha512-Odalh8hICg7SOD7XCj0YLpYCEc+6mkoq63UnExDCiRA2wXEmGlK5JVrW50vZR9Qz4qkvqnHcpH+OFEggO3PgTg==
+"@esbuild/linux-arm@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.20.0.tgz#6b586a488e02e9b073a75a957f2952b3b6e87b4c"
+  integrity sha512-2ezuhdiZw8vuHf1HKSf4TIk80naTbP9At7sOqZmdVwvvMyuoDiZB49YZKLsLOfKIr77+I40dWpHVeY5JHpIEIg==
 
 "@esbuild/linux-ia32@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.17.19.tgz#e28c25266b036ce1cabca3c30155222841dc035a"
   integrity sha512-w4IRhSy1VbsNxHRQpeGCHEmibqdTUx61Vc38APcsRbuVgK0OPEnQ0YD39Brymn96mOx48Y2laBQGqgZ0j9w6SQ==
 
-"@esbuild/linux-ia32@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.19.2.tgz#3fc4f0fa026057fe885e4a180b3956e704f1ceaa"
-  integrity sha512-mLfp0ziRPOLSTek0Gd9T5B8AtzKAkoZE70fneiiyPlSnUKKI4lp+mGEnQXcQEHLJAcIYDPSyBvsUbKUG2ri/XQ==
+"@esbuild/linux-ia32@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.20.0.tgz#84ce7864f762708dcebc1b123898a397dea13624"
+  integrity sha512-c88wwtfs8tTffPaoJ+SQn3y+lKtgTzyjkD8NgsyCtCmtoIC8RDL7PrJU05an/e9VuAke6eJqGkoMhJK1RY6z4w==
 
 "@esbuild/linux-loong64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.17.19.tgz#0f887b8bb3f90658d1a0117283e55dbd4c9dcf72"
   integrity sha512-2iAngUbBPMq439a+z//gE+9WBldoMp1s5GWsUSgqHLzLJ9WoZLZhpwWuym0u0u/4XmZ3gpHmzV84PonE+9IIdQ==
 
-"@esbuild/linux-loong64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.19.2.tgz#633bcaea443f3505fb0ed109ab840c99ad3451a4"
-  integrity sha512-hn28+JNDTxxCpnYjdDYVMNTR3SKavyLlCHHkufHV91fkewpIyQchS1d8wSbmXhs1fiYDpNww8KTFlJ1dHsxeSw==
+"@esbuild/linux-loong64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.20.0.tgz#1922f571f4cae1958e3ad29439c563f7d4fd9037"
+  integrity sha512-lR2rr/128/6svngnVta6JN4gxSXle/yZEZL3o4XZ6esOqhyR4wsKyfu6qXAL04S4S5CgGfG+GYZnjFd4YiG3Aw==
 
 "@esbuild/linux-mips64el@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.17.19.tgz#f5d2a0b8047ea9a5d9f592a178ea054053a70289"
   integrity sha512-LKJltc4LVdMKHsrFe4MGNPp0hqDFA1Wpt3jE1gEyM3nKUvOiO//9PheZZHfYRfYl6AwdTH4aTcXSqBerX0ml4A==
 
-"@esbuild/linux-mips64el@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.19.2.tgz#e0bff2898c46f52be7d4dbbcca8b887890805823"
-  integrity sha512-KbXaC0Sejt7vD2fEgPoIKb6nxkfYW9OmFUK9XQE4//PvGIxNIfPk1NmlHmMg6f25x57rpmEFrn1OotASYIAaTg==
+"@esbuild/linux-mips64el@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.20.0.tgz#7ca1bd9df3f874d18dbf46af009aebdb881188fe"
+  integrity sha512-9Sycc+1uUsDnJCelDf6ZNqgZQoK1mJvFtqf2MUz4ujTxGhvCWw+4chYfDLPepMEvVL9PDwn6HrXad5yOrNzIsQ==
 
 "@esbuild/linux-ppc64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.17.19.tgz#876590e3acbd9fa7f57a2c7d86f83717dbbac8c7"
   integrity sha512-/c/DGybs95WXNS8y3Ti/ytqETiW7EU44MEKuCAcpPto3YjQbyK3IQVKfF6nbghD7EcLUGl0NbiL5Rt5DMhn5tg==
 
-"@esbuild/linux-ppc64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.19.2.tgz#d75798da391f54a9674f8c143b9a52d1dbfbfdde"
-  integrity sha512-dJ0kE8KTqbiHtA3Fc/zn7lCd7pqVr4JcT0JqOnbj4LLzYnp+7h8Qi4yjfq42ZlHfhOCM42rBh0EwHYLL6LEzcw==
+"@esbuild/linux-ppc64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.20.0.tgz#8f95baf05f9486343bceeb683703875d698708a4"
+  integrity sha512-CoWSaaAXOZd+CjbUTdXIJE/t7Oz+4g90A3VBCHLbfuc5yUQU/nFDLOzQsN0cdxgXd97lYW/psIIBdjzQIwTBGw==
 
 "@esbuild/linux-riscv64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.17.19.tgz#7f49373df463cd9f41dc34f9b2262d771688bf09"
   integrity sha512-FC3nUAWhvFoutlhAkgHf8f5HwFWUL6bYdvLc/TTuxKlvLi3+pPzdZiFKSWz/PF30TB1K19SuCxDTI5KcqASJqA==
 
-"@esbuild/linux-riscv64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.19.2.tgz#012409bd489ed1bb9b775541d4a46c5ded8e6dd8"
-  integrity sha512-7Z/jKNFufZ/bbu4INqqCN6DDlrmOTmdw6D0gH+6Y7auok2r02Ur661qPuXidPOJ+FSgbEeQnnAGgsVynfLuOEw==
+"@esbuild/linux-riscv64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.20.0.tgz#ca63b921d5fe315e28610deb0c195e79b1a262ca"
+  integrity sha512-mlb1hg/eYRJUpv8h/x+4ShgoNLL8wgZ64SUr26KwglTYnwAWjkhR2GpoKftDbPOCnodA9t4Y/b68H4J9XmmPzA==
 
 "@esbuild/linux-s390x@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.17.19.tgz#e2afd1afcaf63afe2c7d9ceacd28ec57c77f8829"
   integrity sha512-IbFsFbxMWLuKEbH+7sTkKzL6NJmG2vRyy6K7JJo55w+8xDk7RElYn6xvXtDW8HCfoKBFK69f3pgBJSUSQPr+4Q==
 
-"@esbuild/linux-s390x@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.19.2.tgz#ece3ed75c5a150de8a5c110f02e97d315761626b"
-  integrity sha512-U+RinR6aXXABFCcAY4gSlv4CL1oOVvSSCdseQmGO66H+XyuQGZIUdhG56SZaDJQcLmrSfRmx5XZOWyCJPRqS7g==
+"@esbuild/linux-s390x@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.20.0.tgz#cb3d069f47dc202f785c997175f2307531371ef8"
+  integrity sha512-fgf9ubb53xSnOBqyvWEY6ukBNRl1mVX1srPNu06B6mNsNK20JfH6xV6jECzrQ69/VMiTLvHMicQR/PgTOgqJUQ==
 
 "@esbuild/linux-x64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.17.19.tgz#8a0e9738b1635f0c53389e515ae83826dec22aa4"
   integrity sha512-68ngA9lg2H6zkZcyp22tsVt38mlhWde8l3eJLWkyLrp4HwMUr3c1s/M2t7+kHIhvMjglIBrFpncX1SzMckomGw==
 
-"@esbuild/linux-x64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.19.2.tgz#dea187019741602d57aaf189a80abba261fbd2aa"
-  integrity sha512-oxzHTEv6VPm3XXNaHPyUTTte+3wGv7qVQtqaZCrgstI16gCuhNOtBXLEBkBREP57YTd68P0VgDgG73jSD8bwXQ==
+"@esbuild/linux-x64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.20.0.tgz#ac617e0dc14e9758d3d7efd70288c14122557dc7"
+  integrity sha512-H9Eu6MGse++204XZcYsse1yFHmRXEWgadk2N58O/xd50P9EvFMLJTQLg+lB4E1cF2xhLZU5luSWtGTb0l9UeSg==
 
 "@esbuild/netbsd-x64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.17.19.tgz#c29fb2453c6b7ddef9a35e2c18b37bda1ae5c462"
   integrity sha512-CwFq42rXCR8TYIjIfpXCbRX0rp1jo6cPIUPSaWwzbVI4aOfX96OXY8M6KNmtPcg7QjYeDmN+DD0Wp3LaBOLf4Q==
 
-"@esbuild/netbsd-x64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.19.2.tgz#bbfd7cf9ab236a23ee3a41b26f0628c57623d92a"
-  integrity sha512-WNa5zZk1XpTTwMDompZmvQLHszDDDN7lYjEHCUmAGB83Bgs20EMs7ICD+oKeT6xt4phV4NDdSi/8OfjPbSbZfQ==
+"@esbuild/netbsd-x64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.20.0.tgz#6cc778567f1513da6e08060e0aeb41f82eb0f53c"
+  integrity sha512-lCT675rTN1v8Fo+RGrE5KjSnfY0x9Og4RN7t7lVrN3vMSjy34/+3na0q7RIfWDAj0e0rCh0OL+P88lu3Rt21MQ==
 
 "@esbuild/openbsd-x64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.17.19.tgz#95e75a391403cb10297280d524d66ce04c920691"
   integrity sha512-cnq5brJYrSZ2CF6c35eCmviIN3k3RczmHz8eYaVlNasVqsNY+JKohZU5MKmaOI+KkllCdzOKKdPs762VCPC20g==
 
-"@esbuild/openbsd-x64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.19.2.tgz#fa5c4c6ee52a360618f00053652e2902e1d7b4a7"
-  integrity sha512-S6kI1aT3S++Dedb7vxIuUOb3oAxqxk2Rh5rOXOTYnzN8JzW1VzBd+IqPiSpgitu45042SYD3HCoEyhLKQcDFDw==
+"@esbuild/openbsd-x64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.20.0.tgz#76848bcf76b4372574fb4d06cd0ed1fb29ec0fbe"
+  integrity sha512-HKoUGXz/TOVXKQ+67NhxyHv+aDSZf44QpWLa3I1lLvAwGq8x1k0T+e2HHSRvxWhfJrFxaaqre1+YyzQ99KixoA==
 
 "@esbuild/sunos-x64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.17.19.tgz#722eaf057b83c2575937d3ffe5aeb16540da7273"
   integrity sha512-vCRT7yP3zX+bKWFeP/zdS6SqdWB8OIpaRq/mbXQxTGHnIxspRtigpkUcDMlSCOejlHowLqII7K2JKevwyRP2rg==
 
-"@esbuild/sunos-x64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.19.2.tgz#52a2ac8ac6284c02d25df22bb4cfde26fbddd68d"
-  integrity sha512-VXSSMsmb+Z8LbsQGcBMiM+fYObDNRm8p7tkUDMPG/g4fhFX5DEFmjxIEa3N8Zr96SjsJ1woAhF0DUnS3MF3ARw==
+"@esbuild/sunos-x64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.20.0.tgz#ea4cd0639bf294ad51bc08ffbb2dac297e9b4706"
+  integrity sha512-GDwAqgHQm1mVoPppGsoq4WJwT3vhnz/2N62CzhvApFD1eJyTroob30FPpOZabN+FgCjhG+AgcZyOPIkR8dfD7g==
 
 "@esbuild/win32-arm64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.17.19.tgz#9aa9dc074399288bdcdd283443e9aeb6b9552b6f"
   integrity sha512-yYx+8jwowUstVdorcMdNlzklLYhPxjniHWFKgRqH7IFlUEa0Umu3KuYplf1HUZZ422e3NU9F4LGb+4O0Kdcaag==
 
-"@esbuild/win32-arm64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.19.2.tgz#719ed5870855de8537aef8149694a97d03486804"
-  integrity sha512-5NayUlSAyb5PQYFAU9x3bHdsqB88RC3aM9lKDAz4X1mo/EchMIT1Q+pSeBXNgkfNmRecLXA0O8xP+x8V+g/LKg==
+"@esbuild/win32-arm64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.20.0.tgz#a5c171e4a7f7e4e8be0e9947a65812c1535a7cf0"
+  integrity sha512-0vYsP8aC4TvMlOQYozoksiaxjlvUcQrac+muDqj1Fxy6jh9l9CZJzj7zmh8JGfiV49cYLTorFLxg7593pGldwQ==
 
 "@esbuild/win32-ia32@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.17.19.tgz#95ad43c62ad62485e210f6299c7b2571e48d2b03"
   integrity sha512-eggDKanJszUtCdlVs0RB+h35wNlb5v4TWEkq4vZcmVt5u/HiDZrTXe2bWFQUez3RgNHwx/x4sk5++4NSSicKkw==
 
-"@esbuild/win32-ia32@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.19.2.tgz#24832223880b0f581962c8660f8fb8797a1e046a"
-  integrity sha512-47gL/ek1v36iN0wL9L4Q2MFdujR0poLZMJwhO2/N3gA89jgHp4MR8DKCmwYtGNksbfJb9JoTtbkoe6sDhg2QTA==
+"@esbuild/win32-ia32@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.20.0.tgz#f8ac5650c412d33ea62d7551e0caf82da52b7f85"
+  integrity sha512-p98u4rIgfh4gdpV00IqknBD5pC84LCub+4a3MO+zjqvU5MVXOc3hqR2UgT2jI2nh3h8s9EQxmOsVI3tyzv1iFg==
 
 "@esbuild/win32-x64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.17.19.tgz#8cfaf2ff603e9aabb910e9c0558c26cf32744061"
   integrity sha512-lAhycmKnVOuRYNtRtatQR1LPQf2oYCkRGkSFnseDAKPl8lu5SOsK/e1sXe5a0Pc5kHIHe6P2I/ilntNv2xf3cA==
 
-"@esbuild/win32-x64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.19.2.tgz#1205014625790c7ff0e471644a878a65d1e34ab0"
-  integrity sha512-tcuhV7ncXBqbt/Ybf0IyrMcwVOAPDckMK9rXNHtF17UTK18OKLpg08glminN06pt2WCoALhXdLfSPbVvK/6fxw==
+"@esbuild/win32-x64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.20.0.tgz#2efddf82828aac85e64cef62482af61c29561bee"
+  integrity sha512-NgJnesu1RtWihtTtXGFMU5YSE6JyyHPMxCwBZK7a6/8d31GuSo9l0Ss7w1Jw5QnKUawG6UEehs883kcXf5fYwg==
 
 "@eslint-community/eslint-utils@^4.2.0", "@eslint-community/eslint-utils@^4.4.0":
   version "4.4.0"
@@ -2888,33 +2893,34 @@ esbuild-plugin-alias@0.2.1:
   resolved "https://registry.npmjs.org/esbuild-plugin-alias/-/esbuild-plugin-alias-0.2.1.tgz#45a86cb941e20e7c2bc68a2bea53562172494fcb"
   integrity sha512-jyfL/pwPqaFXyKnj8lP8iLk6Z0m099uXR45aSN8Av1XD4vhvQutxxPzgA2bTcAwQpa1zCXDcWOlhFgyP3GKqhQ==
 
-esbuild@0.19.2:
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/esbuild/-/esbuild-0.19.2.tgz#b1541828a89dfb6f840d38538767c6130dca2aac"
-  integrity sha512-G6hPax8UbFakEj3hWO0Vs52LQ8k3lnBhxZWomUJDxfz3rZTLqF5k/FCzuNdLx2RbpBiQQF9H9onlDDH1lZsnjg==
+esbuild@0.20.0:
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.20.0.tgz#a7170b63447286cd2ff1f01579f09970e6965da4"
+  integrity sha512-6iwE3Y2RVYCME1jLpBqq7LQWK3MW6vjV2bZy6gt/WrqkY+WE74Spyc0ThAOYpMtITvnjX09CrC6ym7A/m9mebA==
   optionalDependencies:
-    "@esbuild/android-arm" "0.19.2"
-    "@esbuild/android-arm64" "0.19.2"
-    "@esbuild/android-x64" "0.19.2"
-    "@esbuild/darwin-arm64" "0.19.2"
-    "@esbuild/darwin-x64" "0.19.2"
-    "@esbuild/freebsd-arm64" "0.19.2"
-    "@esbuild/freebsd-x64" "0.19.2"
-    "@esbuild/linux-arm" "0.19.2"
-    "@esbuild/linux-arm64" "0.19.2"
-    "@esbuild/linux-ia32" "0.19.2"
-    "@esbuild/linux-loong64" "0.19.2"
-    "@esbuild/linux-mips64el" "0.19.2"
-    "@esbuild/linux-ppc64" "0.19.2"
-    "@esbuild/linux-riscv64" "0.19.2"
-    "@esbuild/linux-s390x" "0.19.2"
-    "@esbuild/linux-x64" "0.19.2"
-    "@esbuild/netbsd-x64" "0.19.2"
-    "@esbuild/openbsd-x64" "0.19.2"
-    "@esbuild/sunos-x64" "0.19.2"
-    "@esbuild/win32-arm64" "0.19.2"
-    "@esbuild/win32-ia32" "0.19.2"
-    "@esbuild/win32-x64" "0.19.2"
+    "@esbuild/aix-ppc64" "0.20.0"
+    "@esbuild/android-arm" "0.20.0"
+    "@esbuild/android-arm64" "0.20.0"
+    "@esbuild/android-x64" "0.20.0"
+    "@esbuild/darwin-arm64" "0.20.0"
+    "@esbuild/darwin-x64" "0.20.0"
+    "@esbuild/freebsd-arm64" "0.20.0"
+    "@esbuild/freebsd-x64" "0.20.0"
+    "@esbuild/linux-arm" "0.20.0"
+    "@esbuild/linux-arm64" "0.20.0"
+    "@esbuild/linux-ia32" "0.20.0"
+    "@esbuild/linux-loong64" "0.20.0"
+    "@esbuild/linux-mips64el" "0.20.0"
+    "@esbuild/linux-ppc64" "0.20.0"
+    "@esbuild/linux-riscv64" "0.20.0"
+    "@esbuild/linux-s390x" "0.20.0"
+    "@esbuild/linux-x64" "0.20.0"
+    "@esbuild/netbsd-x64" "0.20.0"
+    "@esbuild/openbsd-x64" "0.20.0"
+    "@esbuild/sunos-x64" "0.20.0"
+    "@esbuild/win32-arm64" "0.20.0"
+    "@esbuild/win32-ia32" "0.20.0"
+    "@esbuild/win32-x64" "0.20.0"
 
 esbuild@^0.17.11:
   version "0.17.19"

From a88e9f62f371e87ac34a29305dc87a82d227ff30 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 1 Feb 2024 13:34:14 -0500
Subject: [PATCH 24/74] MINOR: [JS] Bump regenerator-runtime from 0.14.0 to
 0.14.1 in /js (#39889)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [regenerator-runtime](https://github.com/facebook/regenerator)
from 0.14.0 to 0.14.1.
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/facebook/regenerator/commit/6be1e6295ce1577142c44863175d5180d91f8f00"><code>6be1e62</code></a>
Bump regenerator-runtime version to 0.14.1.</li>
<li><a
href="https://github.com/facebook/regenerator/commit/f4425fb231762a75493cf8fd6a644bcae5aa13ef"><code>f4425fb</code></a>
Bump <code>@​babel/parser</code> from 7.23.0 to 7.23.6 (<a
href="https://redirect.github.com/facebook/regenerator/issues/712">#712</a>)</li>
<li><a
href="https://github.com/facebook/regenerator/commit/fc6b8442118b855f7ce9da344b3f8afdc0df4582"><code>fc6b844</code></a>
Fix outdated reference link in <code>runtime.js</code> (<a
href="https://redirect.github.com/facebook/regenerator/issues/704">#704</a>)</li>
<li><a
href="https://github.com/facebook/regenerator/commit/853925fdf75dcddd484676000c2e1bc025783f05"><code>853925f</code></a>
Bump <code>@​babel/traverse</code> from 7.22.10 to 7.23.2 (<a
href="https://redirect.github.com/facebook/regenerator/issues/707">#707</a>)</li>
<li><a
href="https://github.com/facebook/regenerator/commit/72fbbb7c626af01746f692041e0f04accdd90300"><code>72fbbb7</code></a>
Bump <code>@​babel/runtime</code> from 7.22.5 to 7.23.2 (<a
href="https://redirect.github.com/facebook/regenerator/issues/706">#706</a>)</li>
<li><a
href="https://github.com/facebook/regenerator/commit/56cf3ad6c613f896914a657a997e9ffd881ba012"><code>56cf3ad</code></a>
Bump mocha from 10.1.0 to 10.2.0 (<a
href="https://redirect.github.com/facebook/regenerator/issues/685">#685</a>)</li>
<li><a
href="https://github.com/facebook/regenerator/commit/34f2fc1aef5721bb6ca0ebe383b27c13a6308ba3"><code>34f2fc1</code></a>
Bump debug from 4.1.1 to 4.3.4 (<a
href="https://redirect.github.com/facebook/regenerator/issues/705">#705</a>)</li>
<li><a
href="https://github.com/facebook/regenerator/commit/1037417e5f6f8b3556b00a180c6e4376532336d2"><code>1037417</code></a>
Bump recast from 0.23.3 to 0.23.4 (<a
href="https://redirect.github.com/facebook/regenerator/issues/689">#689</a>)</li>
<li><a
href="https://github.com/facebook/regenerator/commit/d66afa43bce3c88343108a407d9eb17443d21399"><code>d66afa4</code></a>
Bump browserify-sign from 4.2.1 to 4.2.2 (<a
href="https://redirect.github.com/facebook/regenerator/issues/708">#708</a>)</li>
<li><a
href="https://github.com/facebook/regenerator/commit/50ed6aaee1a9b4d176e2255f8caba833d1dac31b"><code>50ed6aa</code></a>
Fix yield * of falsy values (<a
href="https://redirect.github.com/facebook/regenerator/issues/711">#711</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/facebook/regenerator/compare/regenerator-runtime@0.14.0...regenerator-runtime@0.14.1">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=regenerator-runtime&package-manager=npm_and_yarn&previous-version=0.14.0&new-version=0.14.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 js/package.json | 2 +-
 js/yarn.lock    | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/js/package.json b/js/package.json
index f96764d82245e..bb70fd0a395b0 100644
--- a/js/package.json
+++ b/js/package.json
@@ -102,7 +102,7 @@
     "memfs": "4.5.0",
     "mkdirp": "3.0.1",
     "multistream": "4.1.0",
-    "regenerator-runtime": "0.14.0",
+    "regenerator-runtime": "0.14.1",
     "rollup": "4.3.0",
     "rxjs": "7.8.1",
     "ts-jest": "29.1.1",
diff --git a/js/yarn.lock b/js/yarn.lock
index e7dead09bf8bb..7b3180740d3da 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -5967,10 +5967,10 @@ redent@^4.0.0:
     indent-string "^5.0.0"
     strip-indent "^4.0.0"
 
-regenerator-runtime@0.14.0:
-  version "0.14.0"
-  resolved "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.14.0.tgz#5e19d68eb12d486f797e15a3c6a918f7cec5eb45"
-  integrity sha512-srw17NI0TUWHuGa5CFGGmhfNIeja30WMBfbslPNhf6JrqQlLN5gcrvig1oqPxiVaXb0oW0XRKtH6Nngs5lKCIA==
+regenerator-runtime@0.14.1:
+  version "0.14.1"
+  resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz#356ade10263f685dda125100cd862c1db895327f"
+  integrity sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw==
 
 regex-not@^1.0.0, regex-not@^1.0.2:
   version "1.0.2"

From 796b0cc0ad0509502f5419d379225e6168e2bb06 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Fri, 2 Feb 2024 16:49:59 +0900
Subject: [PATCH 25/74] GH-39872: [Packaging][Ubuntu] Add support for Ubuntu
 24.04 Noble Numbat (#39887)

### Rationale for this change

Ubuntu 24.04 isn't released yet but it seems that Docker image is already available.

### What changes are included in this PR?

Add jobs for Ubuntu 24.04.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* Closes: #39872

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/binary-task.rb                    |  7 +-
 dev/release/verify-release-candidate.sh       |  4 +-
 .../apt/ubuntu-noble/Dockerfile               | 41 +++++++++
 .../apache-arrow/apt/ubuntu-noble-arm64/from  | 18 ++++
 .../apache-arrow/apt/ubuntu-noble/Dockerfile  | 85 +++++++++++++++++++
 dev/tasks/linux-packages/package-task.rb      |  2 +
 dev/tasks/tasks.yml                           |  3 +-
 7 files changed, 156 insertions(+), 4 deletions(-)
 create mode 100644 dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-noble/Dockerfile
 create mode 100644 dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble-arm64/from
 create mode 100644 dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile

diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb
index df6c0778dc805..0c1b98ab32c95 100644
--- a/dev/release/binary-task.rb
+++ b/dev/release/binary-task.rb
@@ -1089,6 +1089,7 @@ def available_apt_targets
       ["ubuntu", "focal", "main"],
       ["ubuntu", "jammy", "main"],
       ["ubuntu", "mantic", "main"],
+      ["ubuntu", "noble", "main"],
     ]
   end
 
@@ -2121,8 +2122,10 @@ def apt_test_targets_default
       # "ubuntu-focal-arm64",
       "ubuntu-jammy",
       # "ubuntu-jammy-arm64",
-      "ubuntu-lunar",
-      # "ubuntu-lunar-arm64",
+      "ubuntu-mantic",
+      # "ubuntu-mantic-arm64",
+      "ubuntu-noble",
+      # "ubuntu-noble-arm64",
     ]
   end
 
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 04fc7fd563f65..a61b5ba094c8a 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -196,7 +196,9 @@ test_apt() {
                 "ubuntu:jammy" \
                 "arm64v8/ubuntu:jammy" \
                 "ubuntu:mantic" \
-                "arm64v8/ubuntu:mantic"; do \
+                "arm64v8/ubuntu:mantic" \
+                "ubuntu:noble" \
+                "arm64v8/ubuntu:noble"; do \
     case "${target}" in
       arm64v8/*)
         if [ "$(arch)" = "aarch64" -o -e /usr/bin/qemu-aarch64-static ]; then
diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-noble/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-noble/Dockerfile
new file mode 100644
index 0000000000000..0e37ee94bb0a3
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-noble/Dockerfile
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM ubuntu:noble
+
+RUN \
+  echo "debconf debconf/frontend select Noninteractive" | \
+    debconf-set-selections
+
+RUN \
+  echo 'APT::Install-Recommends "false";' > \
+    /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+
+RUN \
+  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+  apt update ${quiet} && \
+  apt install -y -V ${quiet} \
+    build-essential \
+    debhelper \
+    devscripts \
+    fakeroot \
+    gnupg \
+    lsb-release && \
+  apt clean && \
+  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble-arm64/from b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble-arm64/from
new file mode 100644
index 0000000000000..4414c353871c6
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble-arm64/from
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arm64v8/ubuntu:noble
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile
new file mode 100644
index 0000000000000..33f2d9a35371b
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=ubuntu:noble
+FROM ${FROM}
+
+RUN \
+  echo "debconf debconf/frontend select Noninteractive" | \
+    debconf-set-selections
+
+RUN \
+  echo 'APT::Install-Recommends "false";' > \
+    /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+RUN \
+  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+  apt update ${quiet} && \
+  apt install -y -V ${quiet} \
+    build-essential \
+    clang \
+    clang-tools \
+    cmake \
+    debhelper \
+    devscripts \
+    git \
+    gtk-doc-tools \
+    libboost-filesystem-dev \
+    libboost-system-dev \
+    libbrotli-dev \
+    libbz2-dev \
+    libc-ares-dev \
+    libcurl4-openssl-dev \
+    libgirepository1.0-dev \
+    libglib2.0-doc \
+    libgmock-dev \
+    libgoogle-glog-dev \
+    libgrpc++-dev \
+    libgtest-dev \
+    liblz4-dev \
+    libmlir-15-dev \
+    libprotobuf-dev \
+    libprotoc-dev \
+    libre2-dev \
+    libsnappy-dev \
+    libssl-dev \
+    libthrift-dev \
+    libutf8proc-dev \
+    libzstd-dev \
+    llvm-dev \
+    lsb-release \
+    meson \
+    mlir-15-tools \
+    ninja-build \
+    nlohmann-json3-dev \
+    pkg-config \
+    protobuf-compiler-grpc \
+    python3-dev \
+    python3-pip \
+    python3-setuptools \
+    rapidjson-dev \
+    tzdata \
+    valac \
+    zlib1g-dev && \
+  if apt list | grep -q '^libcuda'; then \
+    apt install -y -V ${quiet} nvidia-cuda-toolkit; \
+  else \
+    :; \
+  fi && \
+  apt clean && \
+  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/package-task.rb b/dev/tasks/linux-packages/package-task.rb
index ecd61054daeb1..51fe0b9a75b0c 100644
--- a/dev/tasks/linux-packages/package-task.rb
+++ b/dev/tasks/linux-packages/package-task.rb
@@ -279,6 +279,8 @@ def apt_targets_default
       # "ubuntu-jammy-arm64",
       "ubuntu-mantic",
       # "ubuntu-mantic-arm64",
+      "ubuntu-noble",
+      # "ubuntu-noble-arm64",
     ]
   end
 
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 6c59364d51a50..0f8c58391fa66 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -465,7 +465,8 @@ tasks:
                   "debian-trixie",
                   "ubuntu-focal",
                   "ubuntu-jammy",
-                  "ubuntu-mantic"] %}
+                  "ubuntu-mantic",
+                  "ubuntu-noble"] %}
   {% for architecture in ["amd64", "arm64"] %}
   {{ target }}-{{ architecture }}:
     ci: github

From 129a5291a26e2baa91d98d1910cb2128854e6b60 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 2 Feb 2024 11:26:57 +0100
Subject: [PATCH 26/74] GH-39788: [Python] Validate max_chunksize in
 Table.to_batches (#39796)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

Validating the keyword to be strictly positive, to avoid an infinite loop.

* Closes: #39788

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
---
 python/pyarrow/table.pxi           | 2 ++
 python/pyarrow/tests/test_table.py | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 3c450d61a7659..abda784fb7c18 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -4172,6 +4172,8 @@ cdef class Table(_Tabular):
         reader.reset(new TableBatchReader(deref(self.table)))
 
         if max_chunksize is not None:
+            if not max_chunksize > 0:
+                raise ValueError("'max_chunksize' should be strictly positive")
             c_max_chunksize = max_chunksize
             reader.get().set_chunksize(c_max_chunksize)
 
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index ff38c614c251f..d6def54570581 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -1089,6 +1089,9 @@ def test_table_to_batches():
     table_from_iter = pa.Table.from_batches(iter([batch1, batch2, batch1]))
     assert table.equals(table_from_iter)
 
+    with pytest.raises(ValueError):
+        table.to_batches(max_chunksize=0)
+
 
 def test_table_basics():
     data = [

From 90b30fcbfdfe12fa9ed497c3fa1cfe682b50168f Mon Sep 17 00:00:00 2001
From: Lyndon Shi <9373058+lynshi@users.noreply.github.com>
Date: Fri, 2 Feb 2024 07:15:57 -0800
Subject: [PATCH 27/74] MINOR: [C++][Docs] Fix MapBuilder docstring (#39755)

The [current `MapBuilder` documentation](https://arrow.apache.org/docs/cpp/api/builder.html#_CPPv4N5arrow10MapBuilderE) says:
> To use this class, you must append values to the key and item array builders and use the Append function to delimit each distinct map (once the keys and items have been appended)

This contradicts the [docstring for `Append`](https://arrow.apache.org/docs/cpp/api/builder.html#_CPPv4N5arrow10MapBuilder6AppendEv):
> This function should be called before beginning to append elements to the key and item builders

The `Append` documentation is correct; it should be called *before* keys and items have been appended. If `Append` is called after, as the `MapBuilder` docstring suggests, `Finish` results in an empty `Array`.

### What changes are included in this PR?
Documentation only change.

### Are these changes tested?
There are no behavior changes.

### Are there any user-facing changes?
No

Authored-by: Lyndon Shi <9373058+lynshi@users.noreply.github.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/array/builder_nested.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h
index 8065752f3e278..429aa5c0488cd 100644
--- a/cpp/src/arrow/array/builder_nested.h
+++ b/cpp/src/arrow/array/builder_nested.h
@@ -515,10 +515,9 @@ class ARROW_EXPORT LargeListViewBuilder final
 /// \class MapBuilder
 /// \brief Builder class for arrays of variable-size maps
 ///
-/// To use this class, you must append values to the key and item array builders
-/// and use the Append function to delimit each distinct map (once the keys and items
-/// have been appended) or use the bulk API to append a sequence of offsets and null
-/// maps.
+/// To use this class, you must use the Append function to delimit each distinct
+/// map before appending values to the key and item array builders, or use the
+/// bulk API to append a sequence of offsets and null maps.
 ///
 /// Key uniqueness and ordering are not validated.
 class ARROW_EXPORT MapBuilder : public ArrayBuilder {

From 32bd01fa64b275937ca90aa50b11f275eeefde94 Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Sat, 3 Feb 2024 03:54:47 +0800
Subject: [PATCH 28/74] GH-39843: [C++][Parquet] Parquet binary length overflow
 exception should contain the length of binary (#39844)

### Rationale for this change

See https://github.com/apache/arrow/issues/39843

It will be great to contain a string length in decoder.

### What changes are included in this PR?

change the logging of encoding

### Are these changes tested?

no

### Are there any user-facing changes?

more specific error logging?

* Closes: #39843

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/parquet/column_writer.cc |  3 ++-
 cpp/src/parquet/encoding.cc      | 18 ++++++++++++------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index 23366b2daafd5..eae8fc6125499 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -442,7 +442,8 @@ class SerializedPageWriter : public PageWriter {
     if (offset_index_builder_ != nullptr) {
       const int64_t compressed_size = output_data_len + header_size;
       if (compressed_size > std::numeric_limits<int32_t>::max()) {
-        throw ParquetException("Compressed page size overflows INT32_MAX.");
+        throw ParquetException("Compressed page size ", compressed_size,
+                               " overflows INT32_MAX.");
       }
       if (!page.first_row_index().has_value()) {
         throw ParquetException("First row index is not set in data page.");
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index 5573f5b9aed4c..a3d1746536647 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -160,7 +160,8 @@ class PlainEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
         *array.data(),
         [&](::std::string_view view) {
           if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
-            return Status::Invalid("Parquet cannot store strings with size 2GB or more");
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
           }
           UnsafePutByteArray(view.data(), static_cast<uint32_t>(view.size()));
           return Status::OK();
@@ -571,7 +572,8 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder<DType> {
         *array.data(),
         [&](::std::string_view view) {
           if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
-            return Status::Invalid("Parquet cannot store strings with size 2GB or more");
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
           }
           PutByteArray(view.data(), static_cast<uint32_t>(view.size()));
           return Status::OK();
@@ -585,7 +587,8 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder<DType> {
     for (int64_t i = 0; i < array.length(); i++) {
       auto v = array.GetView(i);
       if (ARROW_PREDICT_FALSE(v.size() > kMaxByteArraySize)) {
-        throw ParquetException("Parquet cannot store strings with size 2GB or more");
+        throw ParquetException(
+            "Parquet cannot store strings with size 2GB or more, got: ", v.size());
       }
       dict_encoded_size_ += static_cast<int>(v.size() + sizeof(uint32_t));
       int32_t unused_memo_index;
@@ -2671,7 +2674,8 @@ class DeltaLengthByteArrayEncoder : public EncoderImpl,
         *array.data(),
         [&](::std::string_view view) {
           if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
-            return Status::Invalid("Parquet cannot store strings with size 2GB or more");
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
           }
           length_encoder_.Put({static_cast<int32_t>(view.length())}, 1);
           PARQUET_THROW_NOT_OK(sink_.Append(view.data(), view.length()));
@@ -3200,7 +3204,8 @@ class DeltaByteArrayEncoder : public EncoderImpl, virtual public TypedEncoder<DT
         *array.data(),
         [&](::std::string_view view) {
           if (ARROW_PREDICT_FALSE(view.size() >= kMaxByteArraySize)) {
-            return Status::Invalid("Parquet cannot store strings with size 2GB or more");
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
           }
           const ByteArray src{view};
 
@@ -3246,7 +3251,8 @@ struct ByteArrayVisitor {
 
   std::string_view operator[](int i) const {
     if (ARROW_PREDICT_FALSE(src[i].len >= kMaxByteArraySize)) {
-      throw ParquetException("Parquet cannot store strings with size 2GB or more");
+      throw ParquetException("Parquet cannot store strings with size 2GB or more, got: ",
+                             src[i].len);
     }
     return std::string_view{src[i]};
   }

From 0fb00fdea7a9541ac8df8a4f784af1dfd0adb056 Mon Sep 17 00:00:00 2001
From: Vibhatha Lakmal Abeykoon <vibhatha@users.noreply.github.com>
Date: Sat, 3 Feb 2024 01:45:40 +0530
Subject: [PATCH 29/74] GH-39734: [Java] Bump
 org.codehaus.mojo:exec-maven-plugin from 1.6.0 to 3.1.1 (#39696)

### Rationale for this change

This PR was created to replace https://github.com/apache/arrow/pull/39374 and do the necessary changes for `org.codehaus.mojo` upgrade to take place.

### What changes are included in this PR?

The changes to the `org.codehaus.mojo` version and an upgrade on the maven version used in the `.env`.

### Are these changes tested?

Tested locally, but this requires a CI verification on Java.

### Are there any user-facing changes?

No

Authored-by: vibhatha <vibhatha@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .env                     |  2 +-
 docker-compose.yml       | 12 +++---------
 java/performance/pom.xml |  2 +-
 java/pom.xml             |  6 +++---
 4 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/.env b/.env
index 6746892fd4ed8..427a4ab0bf398 100644
--- a/.env
+++ b/.env
@@ -65,7 +65,7 @@ JDK=8
 KARTOTHEK=latest
 # LLVM 12 and GCC 11 reports -Wmismatched-new-delete.
 LLVM=14
-MAVEN=3.5.4
+MAVEN=3.6.3
 NODE=18
 NUMBA=latest
 NUMPY=latest
diff --git a/docker-compose.yml b/docker-compose.yml
index a08345c198fa0..0252c4ec8a896 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1709,9 +1709,7 @@ services:
         arch: ${ARCH}
         # Use a newer JDK as it seems to improve stability
         jdk: 17
-        # conda-forge doesn't have 3.5.4 so pinning explicitly, but this should
-        # be set to ${MAVEN}
-        maven: 3.5
+        maven: ${MAVEN}
         node: ${NODE}
         go: ${GO}
     volumes: *conda-volumes
@@ -1843,9 +1841,7 @@ services:
         arch: ${ARCH}
         python: ${PYTHON}
         jdk: ${JDK}
-        # conda-forge doesn't have 3.5.4 so pinning explicitly, but this should
-        # be set to ${MAVEN}
-        maven: 3.5
+        maven: ${MAVEN}
         hdfs: ${HDFS}
     links:
       - impala:impala
@@ -1886,9 +1882,7 @@ services:
         arch: ${ARCH}
         python: ${PYTHON}
         jdk: ${JDK}
-        # conda-forge doesn't have 3.5.4 so pinning explicitly, but this should
-        # be set to ${MAVEN}
-        maven: 3.5
+        maven: ${MAVEN}
         spark: ${SPARK}
         numpy: ${NUMPY}
     shm_size: *shm-size
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index a1d53171f549b..ba5a6616dca77 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -139,7 +139,7 @@
             <plugin>
                 <groupId>org.codehaus.mojo</groupId>
                 <artifactId>exec-maven-plugin</artifactId>
-                <version>1.6.0</version>
+                <version>3.1.1</version>
                 <executions>
                     <execution>
                         <id>run-java-benchmarks</id>
diff --git a/java/pom.xml b/java/pom.xml
index 3e595648ed085..7871303634976 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -1038,7 +1038,7 @@
           <plugin>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>exec-maven-plugin</artifactId>
-            <version>3.1.0</version>
+            <version>3.1.1</version>
             <executions>
               <execution>
                 <id>cdata-cmake</id>
@@ -1099,7 +1099,7 @@
           <plugin>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>exec-maven-plugin</artifactId>
-            <version>3.1.0</version>
+            <version>3.1.1</version>
             <executions>
               <execution>
                 <id>jni-cpp-cmake</id>
@@ -1214,7 +1214,7 @@
           <plugin>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>exec-maven-plugin</artifactId>
-            <version>3.1.0</version>
+            <version>3.1.1</version>
             <executions>
               <execution>
                 <id>jni-cpp-cmake</id>

From 22f2cfd1e1ebe49016b6d97c49f494287a98d02f Mon Sep 17 00:00:00 2001
From: Divyansh200102 <146909065+Divyansh200102@users.noreply.github.com>
Date: Sat, 3 Feb 2024 16:29:49 +0530
Subject: [PATCH 30/74] GH-39416: [GLib][Docs] Fixed Broken Link in README
 Content  (#39896)

### Rationale for this change

### What changes are included in this PR?
 Fixed Broken Link in README Content

### Are these changes tested?
Yes

### Are there any user-facing changes?
Yes

* Closes: #39416

Lead-authored-by: Divyansh200102 <divyanshkhatri200102@gmail.com>
Co-authored-by: Divyansh200102 <146909065+Divyansh200102@users.noreply.github.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/c_glib/README.md b/c_glib/README.md
index 2a4d6b8a6628c..24e69eff65055 100644
--- a/c_glib/README.md
+++ b/c_glib/README.md
@@ -101,7 +101,7 @@ $ sudo meson install -C c_glib.build
 You need to install Arrow C++ before you install Arrow GLib. See Arrow
 C++ document about how to install Arrow C++.
 
-You need [GTK-Doc](https://www.gtk.org/gtk-doc/) and
+You need [GTK-Doc](https://gitlab.gnome.org/GNOME/gtk-doc) and
 [GObject Introspection](https://wiki.gnome.org/Projects/GObjectIntrospection)
 to build Arrow GLib. You can install them by the followings:
 

From aded7bf37686a16fc4b0649ab97231427a219d7b Mon Sep 17 00:00:00 2001
From: david dali susanibar arce <davi.sarces@gmail.com>
Date: Sun, 4 Feb 2024 01:37:36 -0500
Subject: [PATCH 31/74] GH-39909: [Java][CI] Update reference to Float16
 testing file reference on Testing submodule (#39911)

### Rationale for this change

Update reference to Float16 testing file reference on Testing submodule.

### What changes are included in this PR?

Testing submodule. changes.

### Are these changes tested?

Yes

### Are there any user-facing changes?

No
* Closes: #39909

Authored-by: david dali susanibar arce <davi.sarces@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 testing | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testing b/testing
index ad82a736c170e..25d16511e8d42 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit ad82a736c170e97b7c8c035ebd8a801c17eec170
+Subproject commit 25d16511e8d42c2744a1d94d90169e3a36e92631

From 585e0a252f327e7136695f586b187b2ba5a3a1e3 Mon Sep 17 00:00:00 2001
From: Gang Wu <ustcwg@gmail.com>
Date: Mon, 5 Feb 2024 05:55:54 +0800
Subject: [PATCH 32/74] MINOR: [C++][Parquet] Remove undefined GetArrowType
 from schema_internal.h (#39931)

### Rationale for this change

We have redundant declarations below and the 1st one should be removed:
```cpp
Result<std::shared_ptr<::arrow::DataType>> GetArrowType(Type::type physical_type,
                                                        const LogicalType& logical_type,
                                                        int type_length);

Result<std::shared_ptr<::arrow::DataType>> GetArrowType(
    Type::type physical_type, const LogicalType& logical_type, int type_length,
    ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO);
```

### What changes are included in this PR?

Remove the redundant function declaration described above.

### Are these changes tested?

Make sure build and test pass.

### Are there any user-facing changes?

No.

Authored-by: Gang Wu <ustcwg@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/parquet/arrow/schema_internal.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/cpp/src/parquet/arrow/schema_internal.h b/cpp/src/parquet/arrow/schema_internal.h
index 55292ac35ab9c..f56ba0958ae2d 100644
--- a/cpp/src/parquet/arrow/schema_internal.h
+++ b/cpp/src/parquet/arrow/schema_internal.h
@@ -34,10 +34,6 @@ Result<std::shared_ptr<::arrow::DataType>> FromFLBA(const LogicalType& logical_t
 Result<std::shared_ptr<::arrow::DataType>> FromInt32(const LogicalType& logical_type);
 Result<std::shared_ptr<::arrow::DataType>> FromInt64(const LogicalType& logical_type);
 
-Result<std::shared_ptr<::arrow::DataType>> GetArrowType(Type::type physical_type,
-                                                        const LogicalType& logical_type,
-                                                        int type_length);
-
 Result<std::shared_ptr<::arrow::DataType>> GetArrowType(
     Type::type physical_type, const LogicalType& logical_type, int type_length,
     ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO);

From ed78986aa6971484f40a5780922128636a47d175 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 5 Feb 2024 11:51:04 +0900
Subject: [PATCH 33/74] GH-39928: [C++][Gandiva] Accept LLVM 18 (#39934)

### Rationale for this change

LLVM 18.1 will be released soon.

### What changes are included in this PR?

Accept LLVM 18.1.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* Closes: #39928

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/CMakeLists.txt        |  1 +
 cpp/src/gandiva/engine.cc | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 016cd8a1b9ec8..50a85b33d5489 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -152,6 +152,7 @@ set(ARROW_DOC_DIR "share/doc/${PROJECT_NAME}")
 set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support")
 
 set(ARROW_LLVM_VERSIONS
+    "18.1"
     "17.0"
     "16.0"
     "15.0"
diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc
index fc047f2ac0763..bfce72cefc630 100644
--- a/cpp/src/gandiva/engine.cc
+++ b/cpp/src/gandiva/engine.cc
@@ -62,7 +62,11 @@
 #endif
 #include <llvm/Passes/PassBuilder.h>
 #include <llvm/Support/DynamicLibrary.h>
+#if LLVM_VERSION_MAJOR >= 18
+#include <llvm/TargetParser/Host.h>
+#else
 #include <llvm/Support/Host.h>
+#endif
 #include <llvm/Transforms/IPO/GlobalDCE.h>
 #include <llvm/Transforms/IPO/Internalize.h>
 #if LLVM_VERSION_MAJOR >= 14
@@ -86,7 +90,9 @@
 #include <llvm/Transforms/Scalar.h>
 #include <llvm/Transforms/Scalar/GVN.h>
 #include <llvm/Transforms/Utils.h>
+#if LLVM_VERSION_MAJOR <= 17
 #include <llvm/Transforms/Vectorize.h>
+#endif
 
 // JITLink is available in LLVM 9+
 // but the `InProcessMemoryManager::Create` API was added since LLVM 14
@@ -132,8 +138,13 @@ Result<llvm::orc::JITTargetMachineBuilder> MakeTargetMachineBuilder(
     jtmb.setCPU(cpu_name.str());
     jtmb.addFeatures(cpu_attrs);
   }
+#if LLVM_VERSION_MAJOR >= 18
+  using CodeGenOptLevel = llvm::CodeGenOptLevel;
+#else
+  using CodeGenOptLevel = llvm::CodeGenOpt::Level;
+#endif
   auto const opt_level =
-      conf.optimize() ? llvm::CodeGenOpt::Aggressive : llvm::CodeGenOpt::None;
+      conf.optimize() ? CodeGenOptLevel::Aggressive : CodeGenOptLevel::None;
   jtmb.setCodeGenOptLevel(opt_level);
   return jtmb;
 }

From 5856421e31b163104570d0305cb79f323cf488a6 Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Mon, 5 Feb 2024 23:14:48 +0800
Subject: [PATCH 34/74] GH-39921: [Go][Parquet] ColumnWriter not reset
 TotalCompressedBytes after Flush (#39922)

### Rationale for this change

See https://github.com/apache/arrow/issues/39921

### What changes are included in this PR?

Not clearing `totalCompressedBytes` when flush called

### Are these changes tested?

Yes

### Are there any user-facing changes?

Yes, it's a bugfix

* Closes: #39921

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/parquet/file/column_writer.go      |  5 +++--
 go/parquet/file/column_writer_test.go | 28 +++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/go/parquet/file/column_writer.go b/go/parquet/file/column_writer.go
index ac857d17e632d..36663b10b89dd 100755
--- a/go/parquet/file/column_writer.go
+++ b/go/parquet/file/column_writer.go
@@ -397,7 +397,6 @@ func (w *columnWriter) FlushBufferedDataPages() (err error) {
 		}
 	}
 	w.pages = w.pages[:0]
-	w.totalCompressedBytes = 0
 	return
 }
 
@@ -542,7 +541,9 @@ func (w *columnWriter) Close() (err error) {
 	if !w.closed {
 		w.closed = true
 		if w.hasDict && !w.fallbackToNonDict {
-			w.WriteDictionaryPage()
+			if err = w.WriteDictionaryPage(); err != nil {
+				return err
+			}
 		}
 
 		if err = w.FlushBufferedDataPages(); err != nil {
diff --git a/go/parquet/file/column_writer_test.go b/go/parquet/file/column_writer_test.go
index 8011ac2487995..321e7b730d165 100755
--- a/go/parquet/file/column_writer_test.go
+++ b/go/parquet/file/column_writer_test.go
@@ -426,6 +426,26 @@ func (p *PrimitiveWriterTestSuite) testDictionaryFallbackEncoding(version parque
 	}
 }
 
+func (p *PrimitiveWriterTestSuite) testDictionaryFallbackAndCompressedSize(version parquet.Version) {
+	p.GenerateData(SmallSize)
+	props := parquet.DefaultColumnProperties()
+	props.DictionaryEnabled = true
+
+	if version == parquet.V1_0 {
+		props.Encoding = parquet.Encodings.PlainDict
+	} else {
+		props.Encoding = parquet.Encodings.RLEDict
+	}
+
+	writer := p.buildWriter(SmallSize, props, parquet.WithVersion(version))
+	p.WriteBatchValues(writer, nil, nil)
+	writer.FallbackToPlain()
+	p.NotEqual(0, writer.TotalCompressedBytes())
+	writer.Close()
+	p.NotEqual(0, writer.TotalCompressedBytes())
+	p.NotEqual(0, writer.TotalBytesWritten())
+}
+
 func (p *PrimitiveWriterTestSuite) TestRequiredPlain() {
 	p.testRequiredWithEncoding(parquet.Encodings.Plain)
 }
@@ -575,6 +595,14 @@ func (p *PrimitiveWriterTestSuite) TestDictionaryFallbackEncodingV2() {
 	p.testDictionaryFallbackEncoding(parquet.V2_LATEST)
 }
 
+func (p *PrimitiveWriterTestSuite) TestDictionaryFallbackStatsV1() {
+	p.testDictionaryFallbackAndCompressedSize(parquet.V1_0)
+}
+
+func (p *PrimitiveWriterTestSuite) TestDictionaryFallbackStatsV2() {
+	p.testDictionaryFallbackAndCompressedSize(parquet.V2_LATEST)
+}
+
 func (p *PrimitiveWriterTestSuite) TestOptionalNullValueChunk() {
 	// test case for NULL values
 	p.SetupSchema(parquet.Repetitions.Optional, 1)

From 85e2a684b79b560929085c7f8e27586fa6d0b1ff Mon Sep 17 00:00:00 2001
From: Elliot Morrison-Reed <elliotmr@users.noreply.github.com>
Date: Mon, 5 Feb 2024 10:45:46 -0500
Subject: [PATCH 35/74] GH-39925: [Go][Parquet] Fix re-slicing in
 maybeReplaceValidity function (#39926)

### Rationale for this change

See #39925.

### What changes are included in this PR?

Fixes re-slicing logic for multiple data-types and negative length bug.

### Are these changes tested?

There is a new test in the PR.

### Are there any user-facing changes?

No, it just fixes a bug.
* Closes: #39925

Authored-by: Morrison-Reed Elliot (BEG/EVS1-NA) <Elliot.Morrison-Reed@de.bosch.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/parquet/file/column_writer.go      |  5 +++-
 go/parquet/file/column_writer_test.go | 38 +++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/go/parquet/file/column_writer.go b/go/parquet/file/column_writer.go
index 36663b10b89dd..4d603c547ca6a 100755
--- a/go/parquet/file/column_writer.go
+++ b/go/parquet/file/column_writer.go
@@ -660,7 +660,10 @@ func (w *columnWriter) maybeReplaceValidity(values arrow.Array, newNullCount int
 
 	if values.Data().Offset() > 0 {
 		data := values.Data()
-		buffers[1] = memory.NewBufferBytes(data.Buffers()[1].Bytes()[data.Offset()*arrow.Int32SizeBytes : data.Len()*arrow.Int32SizeBytes])
+		elemSize := data.DataType().(arrow.FixedWidthDataType).Bytes()
+		start := data.Offset() * elemSize
+		end := start + data.Len()*elemSize
+		buffers[1] = memory.NewBufferBytes(data.Buffers()[1].Bytes()[start:end])
 	}
 
 	data := array.NewData(values.DataType(), values.Len(), buffers, nil, int(newNullCount), 0)
diff --git a/go/parquet/file/column_writer_test.go b/go/parquet/file/column_writer_test.go
index 321e7b730d165..dd597e280b850 100755
--- a/go/parquet/file/column_writer_test.go
+++ b/go/parquet/file/column_writer_test.go
@@ -24,6 +24,8 @@ import (
 	"sync"
 	"testing"
 
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
 	"github.com/apache/arrow/go/v16/arrow/bitutil"
 	"github.com/apache/arrow/go/v16/arrow/memory"
 	arrutils "github.com/apache/arrow/go/v16/internal/utils"
@@ -36,6 +38,7 @@ import (
 	"github.com/apache/arrow/go/v16/parquet/internal/testutils"
 	"github.com/apache/arrow/go/v16/parquet/internal/utils"
 	"github.com/apache/arrow/go/v16/parquet/metadata"
+	"github.com/apache/arrow/go/v16/parquet/pqarrow"
 	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
@@ -736,3 +739,38 @@ func (b *BooleanValueWriterSuite) TestAlternateBooleanValues() {
 		b.Equal(i%2 == 0, b.ValuesOut.([]bool)[i])
 	}
 }
+
+func TestDictionaryReslice(t *testing.T) {
+	pts := []arrow.DataType{
+		arrow.PrimitiveTypes.Int8,
+		arrow.PrimitiveTypes.Int16,
+		arrow.PrimitiveTypes.Int32,
+		arrow.PrimitiveTypes.Int64,
+		arrow.PrimitiveTypes.Uint8,
+		arrow.PrimitiveTypes.Uint16,
+		arrow.PrimitiveTypes.Uint32,
+		arrow.PrimitiveTypes.Uint64,
+	}
+	for _, pt := range pts {
+		t.Run(pt.String(), func(t *testing.T) {
+			mem := memory.NewGoAllocator()
+			dt := &arrow.DictionaryType{
+				IndexType: pt,
+				ValueType: &arrow.StringType{},
+			}
+			field := arrow.Field{Name: "test_field", Type: dt, Nullable: true}
+			schema := arrow.NewSchema([]arrow.Field{field}, nil)
+			b := array.NewRecordBuilder(mem, schema)
+			for i := 0; i < 2000; i++ {
+				b.Field(0).(*array.BinaryDictionaryBuilder).AppendString("test_value")
+			}
+			rec := b.NewRecord()
+			out := &bytes.Buffer{}
+			pqw, err := pqarrow.NewFileWriter(rec.Schema(), out, nil, pqarrow.NewArrowWriterProperties())
+			assert.NoError(t, err)
+			err = pqw.WriteBuffered(rec)
+			assert.NoError(t, err)
+
+		})
+	}
+}

From 56951fee35c920ac898c2515896ff3bd752dde97 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 5 Feb 2024 17:15:44 +0100
Subject: [PATCH 36/74] GH-39865: [C++] Strip extension metadata when importing
 a registered extension (#39866)

### Rationale for this change

When importing an extension type from the C Data Interface and the extension type is registered, we would still leave the extension-related metadata on the storage type.

### What changes are included in this PR?

Strip extension-related metadata on the storage type if we succeed in recreating the extension type.
This matches the behavior of the IPC layer and allows for more exact roundtripping.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No, unless people mistakingly rely on the presence of said metadata.
* Closes: #39865

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/c/bridge.cc                |  6 +++
 cpp/src/arrow/c/bridge_test.cc           | 48 ++++++++++++++++--------
 cpp/src/arrow/util/key_value_metadata.cc | 18 ++++-----
 cpp/src/arrow/util/key_value_metadata.h  | 11 +++---
 4 files changed, 52 insertions(+), 31 deletions(-)

diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 172ed8962ce77..9b165a10a61e7 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -914,6 +914,8 @@ struct DecodedMetadata {
   std::shared_ptr<KeyValueMetadata> metadata;
   std::string extension_name;
   std::string extension_serialized;
+  int extension_name_index = -1;        // index of extension_name in metadata
+  int extension_serialized_index = -1;  // index of extension_serialized in metadata
 };
 
 Result<DecodedMetadata> DecodeMetadata(const char* metadata) {
@@ -956,8 +958,10 @@ Result<DecodedMetadata> DecodeMetadata(const char* metadata) {
     RETURN_NOT_OK(read_string(&values[i]));
     if (keys[i] == kExtensionTypeKeyName) {
       decoded.extension_name = values[i];
+      decoded.extension_name_index = i;
     } else if (keys[i] == kExtensionMetadataKeyName) {
       decoded.extension_serialized = values[i];
+      decoded.extension_serialized_index = i;
     }
   }
   decoded.metadata = key_value_metadata(std::move(keys), std::move(values));
@@ -1046,6 +1050,8 @@ struct SchemaImporter {
         ARROW_ASSIGN_OR_RAISE(
             type_, registered_ext_type->Deserialize(std::move(type_),
                                                     metadata_.extension_serialized));
+        RETURN_NOT_OK(metadata_.metadata->DeleteMany(
+            {metadata_.extension_name_index, metadata_.extension_serialized_index}));
       }
     }
 
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 321ec36c38d8c..8b67027454c55 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -1872,7 +1872,7 @@ class TestSchemaImport : public ::testing::Test, public SchemaStructBuilder {
     ASSERT_TRUE(ArrowSchemaIsReleased(&c_struct_));
     Reset();            // for further tests
     cb.AssertCalled();  // was released
-    AssertTypeEqual(*expected, *type);
+    AssertTypeEqual(*expected, *type, /*check_metadata=*/true);
   }
 
   void CheckImport(const std::shared_ptr<Field>& expected) {
@@ -1892,7 +1892,7 @@ class TestSchemaImport : public ::testing::Test, public SchemaStructBuilder {
     ASSERT_TRUE(ArrowSchemaIsReleased(&c_struct_));
     Reset();            // for further tests
     cb.AssertCalled();  // was released
-    AssertSchemaEqual(*expected, *schema);
+    AssertSchemaEqual(*expected, *schema, /*check_metadata=*/true);
   }
 
   void CheckImportError() {
@@ -3571,7 +3571,7 @@ class TestSchemaRoundtrip : public ::testing::Test {
     // Recreate the type
     ASSERT_OK_AND_ASSIGN(actual, ImportType(&c_schema));
     type = factory_expected();
-    AssertTypeEqual(*type, *actual);
+    AssertTypeEqual(*type, *actual, /*check_metadata=*/true);
     type.reset();
     actual.reset();
 
@@ -3602,7 +3602,7 @@ class TestSchemaRoundtrip : public ::testing::Test {
     // Recreate the schema
     ASSERT_OK_AND_ASSIGN(actual, ImportSchema(&c_schema));
     schema = factory();
-    AssertSchemaEqual(*schema, *actual);
+    AssertSchemaEqual(*schema, *actual, /*check_metadata=*/true);
     schema.reset();
     actual.reset();
 
@@ -3695,13 +3695,27 @@ TEST_F(TestSchemaRoundtrip, Dictionary) {
   }
 }
 
+// Given an extension type, return a field of its storage type + the
+// serialized extension metadata.
+std::shared_ptr<Field> GetStorageWithMetadata(const std::string& field_name,
+                                              const std::shared_ptr<DataType>& type) {
+  const auto& ext_type = checked_cast<const ExtensionType&>(*type);
+  auto storage_type = ext_type.storage_type();
+  auto md = KeyValueMetadata::Make({kExtensionTypeKeyName, kExtensionMetadataKeyName},
+                                   {ext_type.extension_name(), ext_type.Serialize()});
+  return field(field_name, storage_type, /*nullable=*/true, md);
+}
+
 TEST_F(TestSchemaRoundtrip, UnregisteredExtension) {
   TestWithTypeFactory(uuid, []() { return fixed_size_binary(16); });
   TestWithTypeFactory(dict_extension_type, []() { return dictionary(int8(), utf8()); });
 
-  // Inside nested type
-  TestWithTypeFactory([]() { return list(dict_extension_type()); },
-                      []() { return list(dictionary(int8(), utf8())); });
+  // Inside nested type.
+  // When an extension type is not known by the importer, it is imported
+  // as its storage type and the extension metadata is preserved on the field.
+  TestWithTypeFactory(
+      []() { return list(dict_extension_type()); },
+      []() { return list(GetStorageWithMetadata("item", dict_extension_type())); });
 }
 
 TEST_F(TestSchemaRoundtrip, RegisteredExtension) {
@@ -3710,7 +3724,9 @@ TEST_F(TestSchemaRoundtrip, RegisteredExtension) {
   TestWithTypeFactory(dict_extension_type);
   TestWithTypeFactory(complex128);
 
-  // Inside nested type
+  // Inside nested type.
+  // When the extension type is registered, the extension metadata is removed
+  // from the storage type's field to ensure roundtripping (GH-39865).
   TestWithTypeFactory([]() { return list(uuid()); });
   TestWithTypeFactory([]() { return list(dict_extension_type()); });
   TestWithTypeFactory([]() { return list(complex128()); });
@@ -3810,7 +3826,7 @@ class TestArrayRoundtrip : public ::testing::Test {
     {
       std::shared_ptr<Array> expected;
       ASSERT_OK_AND_ASSIGN(expected, ToResult(factory_expected()));
-      AssertTypeEqual(*expected->type(), *array->type());
+      AssertTypeEqual(*expected->type(), *array->type(), /*check_metadata=*/true);
       AssertArraysEqual(*expected, *array, true);
     }
     array.reset();
@@ -3850,7 +3866,7 @@ class TestArrayRoundtrip : public ::testing::Test {
     {
       std::shared_ptr<RecordBatch> expected;
       ASSERT_OK_AND_ASSIGN(expected, ToResult(factory()));
-      AssertSchemaEqual(*expected->schema(), *batch->schema());
+      AssertSchemaEqual(*expected->schema(), *batch->schema(), /*check_metadata=*/true);
       AssertBatchesEqual(*expected, *batch);
     }
     batch.reset();
@@ -4230,7 +4246,7 @@ class TestDeviceArrayRoundtrip : public ::testing::Test {
     {
       std::shared_ptr<Array> expected;
       ASSERT_OK_AND_ASSIGN(expected, ToResult(factory_expected()));
-      AssertTypeEqual(*expected->type(), *array->type());
+      AssertTypeEqual(*expected->type(), *array->type(), /*check_metadata=*/true);
       AssertArraysEqual(*expected, *array, true);
     }
     array.reset();
@@ -4276,7 +4292,7 @@ class TestDeviceArrayRoundtrip : public ::testing::Test {
     {
       std::shared_ptr<RecordBatch> expected;
       ASSERT_OK_AND_ASSIGN(expected, ToResult(factory()));
-      AssertSchemaEqual(*expected->schema(), *batch->schema());
+      AssertSchemaEqual(*expected->schema(), *batch->schema(), /*check_metadata=*/true);
       AssertBatchesEqual(*expected, *batch);
     }
     batch.reset();
@@ -4353,7 +4369,7 @@ class TestArrayStreamExport : public BaseArrayStreamTest {
     SchemaExportGuard schema_guard(&c_schema);
     ASSERT_FALSE(ArrowSchemaIsReleased(&c_schema));
     ASSERT_OK_AND_ASSIGN(auto schema, ImportSchema(&c_schema));
-    AssertSchemaEqual(expected, *schema);
+    AssertSchemaEqual(expected, *schema, /*check_metadata=*/true);
   }
 
   void AssertStreamEnd(struct ArrowArrayStream* c_stream) {
@@ -4437,7 +4453,7 @@ TEST_F(TestArrayStreamExport, ArrayLifetime) {
   {
     SchemaExportGuard schema_guard(&c_schema);
     ASSERT_OK_AND_ASSIGN(auto got_schema, ImportSchema(&c_schema));
-    AssertSchemaEqual(*schema, *got_schema);
+    AssertSchemaEqual(*schema, *got_schema, /*check_metadata=*/true);
   }
 
   ASSERT_GT(pool_->bytes_allocated(), orig_allocated_);
@@ -4462,7 +4478,7 @@ TEST_F(TestArrayStreamExport, Errors) {
   {
     SchemaExportGuard schema_guard(&c_schema);
     ASSERT_OK_AND_ASSIGN(auto schema, ImportSchema(&c_schema));
-    AssertSchemaEqual(schema, arrow::schema({}));
+    AssertSchemaEqual(schema, arrow::schema({}), /*check_metadata=*/true);
   }
 
   struct ArrowArray c_array;
@@ -4539,7 +4555,7 @@ TEST_F(TestArrayStreamRoundtrip, Simple) {
   ASSERT_OK_AND_ASSIGN(auto reader, RecordBatchReader::Make(batches, orig_schema));
 
   Roundtrip(std::move(reader), [&](const std::shared_ptr<RecordBatchReader>& reader) {
-    AssertSchemaEqual(*orig_schema, *reader->schema());
+    AssertSchemaEqual(*orig_schema, *reader->schema(), /*check_metadata=*/true);
     AssertReaderNext(reader, *batches[0]);
     AssertReaderNext(reader, *batches[1]);
     AssertReaderEnd(reader);
diff --git a/cpp/src/arrow/util/key_value_metadata.cc b/cpp/src/arrow/util/key_value_metadata.cc
index bc48ae76c2a2f..002e8b0975094 100644
--- a/cpp/src/arrow/util/key_value_metadata.cc
+++ b/cpp/src/arrow/util/key_value_metadata.cc
@@ -90,7 +90,7 @@ void KeyValueMetadata::Append(std::string key, std::string value) {
   values_.push_back(std::move(value));
 }
 
-Result<std::string> KeyValueMetadata::Get(const std::string& key) const {
+Result<std::string> KeyValueMetadata::Get(std::string_view key) const {
   auto index = FindKey(key);
   if (index < 0) {
     return Status::KeyError(key);
@@ -129,7 +129,7 @@ Status KeyValueMetadata::DeleteMany(std::vector<int64_t> indices) {
   return Status::OK();
 }
 
-Status KeyValueMetadata::Delete(const std::string& key) {
+Status KeyValueMetadata::Delete(std::string_view key) {
   auto index = FindKey(key);
   if (index < 0) {
     return Status::KeyError(key);
@@ -138,20 +138,18 @@ Status KeyValueMetadata::Delete(const std::string& key) {
   }
 }
 
-Status KeyValueMetadata::Set(const std::string& key, const std::string& value) {
+Status KeyValueMetadata::Set(std::string key, std::string value) {
   auto index = FindKey(key);
   if (index < 0) {
-    Append(key, value);
+    Append(std::move(key), std::move(value));
   } else {
-    keys_[index] = key;
-    values_[index] = value;
+    keys_[index] = std::move(key);
+    values_[index] = std::move(value);
   }
   return Status::OK();
 }
 
-bool KeyValueMetadata::Contains(const std::string& key) const {
-  return FindKey(key) >= 0;
-}
+bool KeyValueMetadata::Contains(std::string_view key) const { return FindKey(key) >= 0; }
 
 void KeyValueMetadata::reserve(int64_t n) {
   DCHECK_GE(n, 0);
@@ -188,7 +186,7 @@ std::vector<std::pair<std::string, std::string>> KeyValueMetadata::sorted_pairs(
   return pairs;
 }
 
-int KeyValueMetadata::FindKey(const std::string& key) const {
+int KeyValueMetadata::FindKey(std::string_view key) const {
   for (size_t i = 0; i < keys_.size(); ++i) {
     if (keys_[i] == key) {
       return static_cast<int>(i);
diff --git a/cpp/src/arrow/util/key_value_metadata.h b/cpp/src/arrow/util/key_value_metadata.h
index 8702ce73a639a..57ade11e75868 100644
--- a/cpp/src/arrow/util/key_value_metadata.h
+++ b/cpp/src/arrow/util/key_value_metadata.h
@@ -20,6 +20,7 @@
 #include <cstdint>
 #include <memory>
 #include <string>
+#include <string_view>
 #include <unordered_map>
 #include <utility>
 #include <vector>
@@ -44,13 +45,13 @@ class ARROW_EXPORT KeyValueMetadata {
   void ToUnorderedMap(std::unordered_map<std::string, std::string>* out) const;
   void Append(std::string key, std::string value);
 
-  Result<std::string> Get(const std::string& key) const;
-  bool Contains(const std::string& key) const;
+  Result<std::string> Get(std::string_view key) const;
+  bool Contains(std::string_view key) const;
   // Note that deleting may invalidate known indices
-  Status Delete(const std::string& key);
+  Status Delete(std::string_view key);
   Status Delete(int64_t index);
   Status DeleteMany(std::vector<int64_t> indices);
-  Status Set(const std::string& key, const std::string& value);
+  Status Set(std::string key, std::string value);
 
   void reserve(int64_t n);
 
@@ -63,7 +64,7 @@ class ARROW_EXPORT KeyValueMetadata {
   std::vector<std::pair<std::string, std::string>> sorted_pairs() const;
 
   /// \brief Perform linear search for key, returning -1 if not found
-  int FindKey(const std::string& key) const;
+  int FindKey(std::string_view key) const;
 
   std::shared_ptr<KeyValueMetadata> Copy() const;
 

From cb5c109a5d6985264203e256ddae0b210251e820 Mon Sep 17 00:00:00 2001
From: Vibhatha Lakmal Abeykoon <vibhatha@users.noreply.github.com>
Date: Mon, 5 Feb 2024 22:23:50 +0530
Subject: [PATCH 37/74] GH-39946: [Java] Bump com.puppycrawl.tools:checkstyle
 from 8.19 to 8.29 (#39694)

### Rationale for this change

This PR was created in place of https://github.com/apache/arrow/pull/39202 to integrate the `puppycrawl.tools.checkstyle` upgrade.

### What changes are included in this PR?

Style changes in Java classes and core changes to the style format itself.
Some unsupported attributes have been removed. And some attributes have
been reorganized upon the provided guidelines in the documentation.

### Are these changes tested?

N/A
Tested by existing checkstyle guideline.

### Are there any user-facing changes?

No

* Closes: #39946

Lead-authored-by: Vibhatha Lakmal Abeykoon <vibhatha@gmail.com>
Co-authored-by: vibhatha <vibhatha@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../apache/arrow/adapter/jdbc/Constants.java  |  3 +-
 .../adapter/jdbc/MockPreparedStatement.java   | 63 ++++++++++++-------
 .../arrow/adapter/jdbc/ResultSetUtility.java  |  3 +-
 .../apache/arrow/adapter/orc/OrcJniUtils.java |  3 +-
 java/dev/checkstyle/checkstyle.xml            | 18 +++---
 java/dev/checkstyle/suppressions.xml          |  2 +-
 .../org/apache/arrow/flight/FlightClient.java |  3 +-
 .../apache/arrow/flight/FlightGrpcUtils.java  |  3 +-
 .../org/apache/arrow/flight/FlightStream.java |  3 +-
 .../arrow/flight/OutboundStreamListener.java  |  3 +-
 .../arrow/flight/auth/AuthConstants.java      |  3 +-
 .../arrow/flight/auth/ServerAuthWrapper.java  |  4 +-
 .../arrow/flight/TestClientMiddleware.java    |  9 ++-
 .../integration/tests/OrderedScenario.java    |  3 +-
 .../jdbc/utils/IntervalStringUtils.java       |  3 +-
 .../utils/ClientAuthenticationUtilsTest.java  |  2 +-
 .../evaluator/ConfigurationBuilder.java       |  3 +-
 .../gandiva/evaluator/DecimalTypeUtil.java    |  3 +-
 .../gandiva/expression/ArrowTypeHelper.java   |  3 +-
 .../arrow/gandiva/expression/TreeBuilder.java |  3 +-
 java/maven/pom.xml                            |  2 +-
 .../arrow/memory/AllocationListener.java      | 15 +++--
 .../apache/arrow/memory/BaseAllocator.java    | 24 +++----
 .../org/apache/arrow/memory/BufferLedger.java | 32 +++++-----
 .../apache/arrow/memory/ReferenceManager.java |  6 +-
 .../memory/util/ByteFunctionHelpers.java      |  3 +-
 .../apache/arrow/memory/util/CommonUtil.java  |  3 +-
 .../arrow/memory/util/LargeMemoryUtil.java    |  3 +-
 .../org/apache/arrow/util/Collections2.java   |  3 +-
 .../org/apache/arrow/util/Preconditions.java  |  3 +-
 java/pom.xml                                  |  2 +-
 .../org/apache/arrow/tools/FileToStream.java  |  3 +-
 .../apache/arrow/vector/AllocationHelper.java |  3 +-
 .../apache/arrow/vector/BitVectorHelper.java  |  3 +-
 .../arrow/vector/GenerateSampleData.java      |  3 +-
 .../org/apache/arrow/vector/NullVector.java   |  3 +-
 .../apache/arrow/vector/compare/Range.java    |  3 +-
 .../arrow/vector/complex/StateTool.java       |  3 +-
 .../apache/arrow/vector/ipc/ArrowMagic.java   |  3 +-
 .../vector/ipc/message/FBSerializables.java   |  3 +-
 .../apache/arrow/vector/util/DateUtility.java |  3 +-
 .../arrow/vector/util/DecimalUtility.java     |  3 +-
 .../arrow/vector/util/DictionaryUtility.java  |  3 +-
 .../vector/util/ObjectMapperFactory.java      |  3 +-
 .../arrow/vector/util/SchemaUtility.java      |  3 +-
 .../testing/ValueVectorDataPopulator.java     |  3 +-
 46 files changed, 174 insertions(+), 107 deletions(-)

diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java
index 5b01077b17996..f95133fc7e44c 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java
@@ -21,7 +21,8 @@
  * String constants used for metadata returned on Vectors.
  */
 public class Constants {
-  private Constants() {}
+  private Constants() {
+  }
 
   public static final String SQL_CATALOG_NAME_KEY = "SQL_CATALOG_NAME";
   public static final String SQL_SCHEMA_NAME_KEY = "SQL_SCHEMA_NAME";
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java
index 438a949b736f1..4478cdfbee6f7 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java
@@ -231,7 +231,8 @@ public void setDate(int parameterIndex, Date x, Calendar cal) throws SQLExceptio
   }
 
   @Override
-  public void setTime(int parameterIndex, Time x, Calendar cal) throws SQLException {}
+  public void setTime(int parameterIndex, Time x, Calendar cal) throws SQLException {
+  }
 
   @Override
   public void setTimestamp(int parameterIndex, Timestamp x, Calendar cal) throws SQLException {
@@ -241,7 +242,8 @@ public void setTimestamp(int parameterIndex, Timestamp x, Calendar cal) throws S
   }
 
   @Override
-  public void setNull(int parameterIndex, int sqlType, String typeName) throws SQLException {}
+  public void setNull(int parameterIndex, int sqlType, String typeName) throws SQLException {
+  }
 
   @Override
   public void setURL(int parameterIndex, URL x) throws SQLException {
@@ -259,62 +261,80 @@ public void setRowId(int parameterIndex, RowId x) throws SQLException {
   }
 
   @Override
-  public void setNString(int parameterIndex, String value) throws SQLException {}
+  public void setNString(int parameterIndex, String value) throws SQLException {
+  }
 
   @Override
   public void setNCharacterStream(int parameterIndex, Reader value, long length)
-      throws SQLException {}
+      throws SQLException {
+  }
 
   @Override
-  public void setNClob(int parameterIndex, NClob value) throws SQLException {}
+  public void setNClob(int parameterIndex, NClob value) throws SQLException {
+  }
 
   @Override
-  public void setClob(int parameterIndex, Reader reader, long length) throws SQLException {}
+  public void setClob(int parameterIndex, Reader reader, long length) throws SQLException {
+  }
 
   @Override
   public void setBlob(int parameterIndex, InputStream inputStream, long length)
-      throws SQLException {}
+      throws SQLException {
+  }
 
   @Override
-  public void setNClob(int parameterIndex, Reader reader, long length) throws SQLException {}
+  public void setNClob(int parameterIndex, Reader reader, long length) throws SQLException {
+  }
 
   @Override
-  public void setSQLXML(int parameterIndex, SQLXML xmlObject) throws SQLException {}
+  public void setSQLXML(int parameterIndex, SQLXML xmlObject) throws SQLException {
+  }
 
   @Override
   public void setObject(int parameterIndex, Object x, int targetSqlType, int scaleOrLength)
-      throws SQLException {}
+      throws SQLException {
+  }
 
   @Override
-  public void setAsciiStream(int parameterIndex, InputStream x, long length) throws SQLException {}
+  public void setAsciiStream(int parameterIndex, InputStream x, long length) throws SQLException {
+  }
 
   @Override
-  public void setBinaryStream(int parameterIndex, InputStream x, long length) throws SQLException {}
+  public void setBinaryStream(int parameterIndex, InputStream x, long length) throws SQLException {
+  }
 
   @Override
   public void setCharacterStream(int parameterIndex, Reader reader, long length)
-      throws SQLException {}
+      throws SQLException {
+  }
 
   @Override
-  public void setAsciiStream(int parameterIndex, InputStream x) throws SQLException {}
+  public void setAsciiStream(int parameterIndex, InputStream x) throws SQLException {
+  }
 
   @Override
-  public void setBinaryStream(int parameterIndex, InputStream x) throws SQLException {}
+  public void setBinaryStream(int parameterIndex, InputStream x) throws SQLException {
+  }
 
   @Override
-  public void setCharacterStream(int parameterIndex, Reader reader) throws SQLException {}
+  public void setCharacterStream(int parameterIndex, Reader reader) throws SQLException {
+  }
 
   @Override
-  public void setNCharacterStream(int parameterIndex, Reader value) throws SQLException {}
+  public void setNCharacterStream(int parameterIndex, Reader value) throws SQLException {
+  }
 
   @Override
-  public void setClob(int parameterIndex, Reader reader) throws SQLException {}
+  public void setClob(int parameterIndex, Reader reader) throws SQLException {
+  }
 
   @Override
-  public void setBlob(int parameterIndex, InputStream inputStream) throws SQLException {}
+  public void setBlob(int parameterIndex, InputStream inputStream) throws SQLException {
+  }
 
   @Override
-  public void setNClob(int parameterIndex, Reader reader) throws SQLException {}
+  public void setNClob(int parameterIndex, Reader reader) throws SQLException {
+  }
 
   @Override
   public ResultSet executeQuery(String sql) throws SQLException {
@@ -327,7 +347,8 @@ public int executeUpdate(String sql) throws SQLException {
   }
 
   @Override
-  public void close() throws SQLException {}
+  public void close() throws SQLException {
+  }
 
   @Override
   public int getMaxFieldSize() throws SQLException {
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java
index c712741b51f5b..ccc7681c5bc8b 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java
@@ -348,7 +348,8 @@ public static class MockColumnMetaData {
       private int displaySize;
 
 
-      private MockColumnMetaData() {}
+      private MockColumnMetaData() {
+      }
 
       private String getLabel() {
         return label;
diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java
index 9b599234bdf51..d61799e990f77 100644
--- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java
+++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java
@@ -32,7 +32,8 @@ class OrcJniUtils {
   private static final String LIBRARY_NAME = "arrow_orc_jni";
   private static boolean isLoaded = false;
 
-  private OrcJniUtils() {}
+  private OrcJniUtils() {
+  }
 
   static void loadOrcAdapterLibraryFromJar()
           throws IOException, IllegalAccessException {
diff --git a/java/dev/checkstyle/checkstyle.xml b/java/dev/checkstyle/checkstyle.xml
index c27f382ddda76..b63a4a9cba1f3 100644
--- a/java/dev/checkstyle/checkstyle.xml
+++ b/java/dev/checkstyle/checkstyle.xml
@@ -60,6 +60,11 @@
       <property name="eachLine" value="true"/>
     </module>
 
+    <module name="LineLength">
+        <property name="max" value="120"/>
+        <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
+    </module>
+
     <module name="TreeWalker">
         <module name="OuterTypeFilename"/>
         <module name="IllegalTokenText">
@@ -72,10 +77,6 @@
             <property name="allowByTailComment" value="true"/>
             <property name="allowNonPrintableEscapes" value="true"/>
         </module>
-        <module name="LineLength">
-            <property name="max" value="120"/>
-            <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
-        </module>
         <module name="OneTopLevelClass"/>
         <module name="NoLineWrap"/>
         <module name="EmptyBlock">
@@ -223,13 +224,12 @@
         <module name="JavadocMethod">
             <property name="scope" value="public"/>
             <property name="allowMissingParamTags" value="true"/>
-            <property name="allowMissingThrowsTags" value="true"/>
             <property name="allowMissingReturnTag" value="true"/>
-            <property name="minLineCount" value="2"/>
             <property name="allowedAnnotations" value="Override, Test"/>
-            <property name="allowThrowsTagsForSubclasses" value="true"/>
-            <!-- This seems partially broken under JDK >= 9. -->
-            <property name="suppressLoadErrors" value="true"/>
+        </module>
+        <module name="MissingJavadocMethod">
+            <property name="scope" value="public"/>
+            <property name="minLineCount" value="2"/>
             <property name="ignoreMethodNamesRegex" value="main"/>
         </module>
         <module name="JavadocType">
diff --git a/java/dev/checkstyle/suppressions.xml b/java/dev/checkstyle/suppressions.xml
index 585985bf32dbc..a3536e2ca9212 100644
--- a/java/dev/checkstyle/suppressions.xml
+++ b/java/dev/checkstyle/suppressions.xml
@@ -40,5 +40,5 @@
   <suppress checks="NoFinalizer|OverloadMethodsDeclarationOrder|VariableDeclarationUsageDistance" files=".*" />
 
   <!-- No license header in generated file -->
-  <suppress checks="header" files="flight.properties"/>
+  <suppress checks="header|LineLength" files="flight.properties"/>
 </suppressions>
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
index fc491ebe0df98..8f251a7c7ef07 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
@@ -437,7 +437,8 @@ public ClientStreamListener getWriter() {
      */
     public void getResult() {
       // After exchange is complete, make sure stream is drained to propagate errors through reader
-      while (reader.next()) { };
+      while (reader.next()) {
+      }
     }
 
     /** Shut down the streams in this call. */
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java
index eb5e492b4cd46..b711d7ef6b5d7 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java
@@ -125,7 +125,8 @@ public void enterIdle() {
     }
   }
 
-  private FlightGrpcUtils() {}
+  private FlightGrpcUtils() {
+  }
 
   /**
    * Creates a Flight service.
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java
index 7a5a941603ace..84beee7d40564 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java
@@ -194,7 +194,8 @@ public void close() throws Exception {
           }
         }
         // Drain the stream without the lock (as next() implicitly needs the lock)
-        while (next()) { }
+        while (next()) {
+        }
       } catch (FlightRuntimeException e) {
         suppressor = e;
       }
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java
index e80fb41c67273..80ddad90a1d28 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java
@@ -119,5 +119,6 @@ default void start(VectorSchemaRoot root, DictionaryProvider dictionaries) {
    * <p>The default value can be toggled globally by setting the JVM property arrow.flight.enable_zero_copy_write
    * or the environment variable ARROW_FLIGHT_ENABLE_ZERO_COPY_WRITE.
    */
-  default void setUseZeroCopy(boolean enabled) {}
+  default void setUseZeroCopy(boolean enabled) {
+  }
 }
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java
index e3ccdc626d71b..8a37115f1f024 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java
@@ -47,5 +47,6 @@ public byte[] parseBytes(byte[] serialized) {
 
   public static final Context.Key<String> PEER_IDENTITY_KEY = Context.keyWithDefault("arrow-flight-peer-identity", "");
 
-  private AuthConstants() {}
+  private AuthConstants() {
+  }
 }
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java
index ad1a36a935fd7..3647e113cc0f6 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java
@@ -115,7 +115,9 @@ public boolean hasNext() {
     @Override
     public void onError(Throwable t) {
       completed = true;
-      while (future == null) {/* busy wait */}
+      while (future == null) {
+        /* busy wait */
+      }
       future.cancel(true);
     }
 
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java
index bcff54bd7f66f..a1fa1f1d18509 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java
@@ -303,10 +303,12 @@ public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {
     }
 
     @Override
-    public void onCallCompleted(CallStatus status) {}
+    public void onCallCompleted(CallStatus status) {
+    }
 
     @Override
-    public void onCallErrored(Throwable err) {}
+    public void onCallErrored(Throwable err) {
+    }
   }
 
   static class MultiHeaderClientMiddlewareFactory implements FlightClientMiddleware.Factory {
@@ -356,6 +358,7 @@ public void onHeadersReceived(CallHeaders incomingHeaders) {
     }
 
     @Override
-    public void onCallCompleted(CallStatus status) {}
+    public void onCallCompleted(CallStatus status) {
+    }
   }
 }
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/OrderedScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/OrderedScenario.java
index b8aa46fb5674a..13238f318eaaa 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/OrderedScenario.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/OrderedScenario.java
@@ -55,7 +55,8 @@ public FlightProducer producer(BufferAllocator allocator, Location location) thr
   }
 
   @Override
-  public void buildServer(FlightServer.Builder builder) throws Exception {}
+  public void buildServer(FlightServer.Builder builder) throws Exception {
+  }
 
   @Override
   public void client(BufferAllocator allocator, Location location, FlightClient client)
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java
index fdf6c508d93b0..de6dccad4a846 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java
@@ -31,7 +31,8 @@ public final class IntervalStringUtils {
   /**
    * Constructor Method of class.
    */
-  private IntervalStringUtils( ) {}
+  private IntervalStringUtils( ) {
+  }
 
   /**
    * Formats a period similar to Oracle INTERVAL YEAR TO MONTH data type<br>.
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java
index b7977462e9c01..78d252f7824c3 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java
@@ -84,7 +84,7 @@ public void testGetDefaultKeyStoreInstancePassword() throws IOException,
 
       keyStoreMockedStatic
          .when(() -> ClientAuthenticationUtils.getDefaultKeyStoreInstance("changeit"))
-         .thenReturn(keyStoreMock);
+          .thenReturn(keyStoreMock);
       KeyStore receiveKeyStore = ClientAuthenticationUtils.getDefaultKeyStoreInstance("changeit");
       Assert.assertEquals(receiveKeyStore, keyStoreMock);
     }
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
index e903b4e873278..fa5d285b90997 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
@@ -43,7 +43,8 @@ public static ConfigOptions getDefault() {
       return new ConfigOptions();
     }
 
-    public ConfigOptions() {}
+    public ConfigOptions() {
+    }
 
     public ConfigOptions withOptimize(boolean optimize) {
       this.optimize = optimize;
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java
index e0c072cfbe52e..703cfaa8be88b 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java
@@ -23,7 +23,8 @@
  * Utility methods for working with {@link Decimal} values.
  */
 public class DecimalTypeUtil {
-  private DecimalTypeUtil() {}
+  private DecimalTypeUtil() {
+  }
 
   /**
    * Enum for supported mathematical operations.
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java
index 90f8684b455a8..e7377cc5c9db4 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java
@@ -33,7 +33,8 @@
  * Utility methods to convert between Arrow and Gandiva types.
  */
 public class ArrowTypeHelper {
-  private ArrowTypeHelper() {}
+  private ArrowTypeHelper() {
+  }
 
   static final int WIDTH_8 = 8;
   static final int WIDTH_16 = 16;
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
index 8656e886aae24..3d2ea27d044e7 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
@@ -29,7 +29,8 @@
  * Contains helper functions for constructing expression trees.
  */
 public class TreeBuilder {
-  private TreeBuilder() {}
+  private TreeBuilder() {
+  }
 
   /**
    * Helper functions to create literal constants.
diff --git a/java/maven/pom.xml b/java/maven/pom.xml
index 3a88ec762e19c..7fdca7db7b8d8 100644
--- a/java/maven/pom.xml
+++ b/java/maven/pom.xml
@@ -235,7 +235,7 @@
           <dependency>
             <groupId>com.puppycrawl.tools</groupId>
             <artifactId>checkstyle</artifactId>
-            <version>8.19</version>
+            <version>8.29</version>
           </dependency>
           <dependency>
             <groupId>org.slf4j</groupId>
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java
index ff2b25dfa30ab..b8de6d819eaf8 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java
@@ -34,7 +34,8 @@ public interface AllocationListener {
    *
    * @param size the buffer size being allocated
    */
-  default void onPreAllocation(long size) {}
+  default void onPreAllocation(long size) {
+  }
 
   /**
    * Called each time a new buffer has been allocated.
@@ -43,7 +44,8 @@ default void onPreAllocation(long size) {}
    *
    * @param size the buffer size being allocated
    */
-  default void onAllocation(long size) {}
+  default void onAllocation(long size) {
+  }
 
   /**
    * Informed each time a buffer is released from allocation.
@@ -51,7 +53,8 @@ default void onAllocation(long size) {}
    * <p>An exception cannot be thrown by this method.
    * @param size The size of the buffer being released.
    */
-  default void onRelease(long size) {}
+  default void onRelease(long size) {
+  }
 
 
   /**
@@ -73,7 +76,8 @@ default boolean onFailedAllocation(long size, AllocationOutcome outcome) {
    * @param parentAllocator The parent allocator to which a child was added
    * @param childAllocator  The child allocator that was just added
    */
-  default void onChildAdded(BufferAllocator parentAllocator, BufferAllocator childAllocator) {}
+  default void onChildAdded(BufferAllocator parentAllocator, BufferAllocator childAllocator) {
+  }
 
   /**
    * Called immediately after a child allocator was removed from the parent allocator.
@@ -81,5 +85,6 @@ default void onChildAdded(BufferAllocator parentAllocator, BufferAllocator child
    * @param parentAllocator The parent allocator from which a child was removed
    * @param childAllocator The child allocator that was just removed
    */
-  default void onChildRemoved(BufferAllocator parentAllocator, BufferAllocator childAllocator) {}
+  default void onChildRemoved(BufferAllocator parentAllocator, BufferAllocator childAllocator) {
+  }
 }
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
index 8779c7a3434ea..189c800ba0fe5 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
@@ -702,18 +702,18 @@ private void verifyAllocator(
   void print(StringBuilder sb, int level, Verbosity verbosity) {
 
     CommonUtil.indent(sb, level)
-      .append("Allocator(")
-      .append(name)
-      .append(") ")
-      .append(reservation)
-      .append('/')
-      .append(getAllocatedMemory())
-      .append('/')
-      .append(getPeakMemoryAllocation())
-      .append('/')
-      .append(getLimit())
-      .append(" (res/actual/peak/limit)")
-      .append('\n');
+        .append("Allocator(")
+        .append(name)
+        .append(") ")
+        .append(reservation)
+        .append('/')
+        .append(getAllocatedMemory())
+        .append('/')
+        .append(getPeakMemoryAllocation())
+        .append('/')
+        .append(getLimit())
+        .append(" (res/actual/peak/limit)")
+        .append('\n');
 
     if (DEBUG) {
       CommonUtil.indent(sb, level + 1).append(String.format("child allocators: %d\n", childAllocators.size()));
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java
index 1ca3e08ecf046..62d268a1f4493 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java
@@ -478,20 +478,20 @@ public long getAccountedSize() {
    */
   void print(StringBuilder sb, int indent, BaseAllocator.Verbosity verbosity) {
     CommonUtil.indent(sb, indent)
-      .append("ledger[")
-      .append(ledgerId)
-      .append("] allocator: ")
-      .append(allocator.getName())
-      .append("), isOwning: ")
-      .append(", size: ")
-      .append(", references: ")
-      .append(bufRefCnt.get())
-      .append(", life: ")
-      .append(lCreationTime)
-      .append("..")
-      .append(lDestructionTime)
-      .append(", allocatorManager: [")
-      .append(", life: ");
+        .append("ledger[")
+        .append(ledgerId)
+        .append("] allocator: ")
+        .append(allocator.getName())
+        .append("), isOwning: ")
+        .append(", size: ")
+        .append(", references: ")
+        .append(bufRefCnt.get())
+        .append(", life: ")
+        .append(lCreationTime)
+        .append("..")
+        .append(lDestructionTime)
+        .append(", allocatorManager: [")
+        .append(", life: ");
 
     if (!BaseAllocator.DEBUG) {
       sb.append("]\n");
@@ -499,8 +499,8 @@ void print(StringBuilder sb, int indent, BaseAllocator.Verbosity verbosity) {
       Preconditions.checkArgument(buffers != null, "IdentityHashMap of buffers must not be null");
       synchronized (buffers) {
         sb.append("] holds ")
-          .append(buffers.size())
-          .append(" buffers. \n");
+            .append(buffers.size())
+            .append(" buffers. \n");
         for (ArrowBuf buf : buffers.keySet()) {
           buf.print(sb, indent + 2, verbosity);
           sb.append('\n');
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java
index 7d4de18751ba9..64a4232d8aeb7 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java
@@ -141,10 +141,12 @@ public boolean release(int decrement) {
     }
 
     @Override
-    public void retain() { }
+    public void retain() {
+    }
 
     @Override
-    public void retain(int increment) { }
+    public void retain(int increment) {
+    }
 
     @Override
     public ArrowBuf retain(ArrowBuf srcBuffer, BufferAllocator targetAllocator) {
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java
index 9579245ca7004..79d21fa040876 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java
@@ -32,7 +32,8 @@ public class ByteFunctionHelpers {
 
   private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
 
-  private ByteFunctionHelpers() {}
+  private ByteFunctionHelpers() {
+  }
 
   /**
    * Helper function to check for equality of bytes in two ArrowBufs.
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java
index ccca7b1e03093..707c5f1556062 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java
@@ -24,7 +24,8 @@
  */
 public final class CommonUtil {
 
-  private CommonUtil() { }
+  private CommonUtil() {
+  }
 
   /**
    * Rounds up the provided value to the nearest power of two.
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java
index db63bbd14ba5f..94a7873664216 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java
@@ -22,7 +22,8 @@
 /** Contains utilities for dealing with a 64-bit address base. */
 public final class LargeMemoryUtil {
 
-  private LargeMemoryUtil() {}
+  private LargeMemoryUtil() {
+  }
 
   /**
    * Casts length to an int, but raises an exception the value is outside
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java
index 6b01a61ebca39..b88372abaaee1 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java
@@ -34,7 +34,8 @@
  * Utility methods for manipulating {@link java.util.Collections} and their subclasses/implementations.
  */
 public final class Collections2 {
-  private Collections2() {}
+  private Collections2() {
+  }
 
   /**
    * Creates a {@link List} from the elements remaining in iterator.
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java
index 8083033007d9c..5e4323cfc9c61 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java
@@ -111,7 +111,8 @@
  * @since 2.0
  */
 public final class Preconditions {
-  private Preconditions() {}
+  private Preconditions() {
+  }
 
   /**
    * Ensures the truth of an expression involving one or more parameters to the calling method.
diff --git a/java/pom.xml b/java/pom.xml
index 7871303634976..b2b300b2f3fed 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -304,7 +304,7 @@
           <dependency>
             <groupId>com.puppycrawl.tools</groupId>
             <artifactId>checkstyle</artifactId>
-            <version>8.19</version>
+            <version>8.29</version>
           </dependency>
           <dependency>
             <groupId>org.slf4j</groupId>
diff --git a/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java b/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java
index bb7cedeb74579..3d9bca58a763c 100644
--- a/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java
+++ b/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java
@@ -34,7 +34,8 @@
  * first argument and the output is written to standard out.
  */
 public class FileToStream {
-  private FileToStream() {}
+  private FileToStream() {
+  }
 
   /**
    * Reads an Arrow file from in and writes it back to out.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java b/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java
index 6824756d8aca7..abece39475016 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java
@@ -22,7 +22,8 @@
 
 /** Helper utility methods for allocating storage for Vectors. */
 public class AllocationHelper {
-  private AllocationHelper() {}
+  private AllocationHelper() {
+  }
 
   /**
    * Allocates the vector.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
index 568554ba75ed6..10f343e260ccc 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
@@ -33,7 +33,8 @@
  */
 public class BitVectorHelper {
 
-  private BitVectorHelper() {}
+  private BitVectorHelper() {
+  }
 
   /**
    * Get the index of byte corresponding to bit index in validity buffer.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java b/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java
index 6cda18a8a53d3..be501ce245410 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java
@@ -27,7 +27,8 @@
  * with sample data. This class should be used for that purpose.
  */
 public class GenerateSampleData {
-  private GenerateSampleData() {}
+  private GenerateSampleData() {
+  }
 
   /** Populates <code>vector</code> with <code>valueCount</code> random values. */
   public static void generateTestData(final ValueVector vector, final int valueCount) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
index d7b147feb152f..3b734bbf6608b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
@@ -300,7 +300,8 @@ public int getNullCount() {
    * @param index position of element
    */
   @Override
-  public void setNull(int index) {}
+  public void setNull(int index) {
+  }
 
   @Override
   public boolean isNull(int index) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java
index 0de99ab011f66..76db0734464ed 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java
@@ -41,7 +41,8 @@ public class Range {
   /**
    * Constructs a new instance.
    */
-  public Range() {}
+  public Range() {
+  }
 
   /**
    * Constructs a new instance.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java
index 0098f68360a1a..2cd64c4fc6766 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java
@@ -23,7 +23,8 @@
  * Utility methods for state machines based on enums.
  */
 public class StateTool {
-  private StateTool() {}
+  private StateTool() {
+  }
 
   static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StateTool.class);
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java
index 9c399669affc3..b16315caa9f51 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java
@@ -25,7 +25,8 @@
  * Magic header/footer helpers for {@link ArrowFileWriter} and {@link ArrowFileReader} formatted files.
  */
 class ArrowMagic {
-  private ArrowMagic(){}
+  private ArrowMagic(){
+  }
 
   private static final byte[] MAGIC = "ARROW1".getBytes(StandardCharsets.UTF_8);
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java
index 26736ed91c5ca..59b3bb07bcf16 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java
@@ -31,7 +31,8 @@
  * Utility methods for {@linkplain org.apache.arrow.vector.ipc.message.FBSerializable}s.
  */
 public class FBSerializables {
-  private FBSerializables() {}
+  private FBSerializables() {
+  }
 
   /**
    * Writes every element of all to builder and calls {@link FlatBufferBuilder#endVector()} afterwards.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java
index 9e8b6d26f6fd7..f7f975a0d0e7b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java
@@ -26,7 +26,8 @@
 
 /** Utility class for Date, DateTime, TimeStamp, Interval data types. */
 public class DateUtility {
-  private DateUtility() {}
+  private DateUtility() {
+  }
 
   private static final String UTC = "UTC";
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
index 0dfb61dcdf269..4635822e5141b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
@@ -29,7 +29,8 @@
  * Utility methods for configurable precision Decimal values (e.g. {@link BigDecimal}).
  */
 public class DecimalUtility {
-  private DecimalUtility() {}
+  private DecimalUtility() {
+  }
 
   public static final byte [] zeroes = new byte[] {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java
index 9592f3975ab99..76fb585e6bd3a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java
@@ -35,7 +35,8 @@
  * Utility methods for working with Dictionaries used in Dictionary encodings.
  */
 public class DictionaryUtility {
-  private DictionaryUtility() {}
+  private DictionaryUtility() {
+  }
 
   /**
    * Convert field and child fields that have a dictionary encoding to message format, so fields
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/ObjectMapperFactory.java b/java/vector/src/main/java/org/apache/arrow/vector/util/ObjectMapperFactory.java
index 39488e96efda0..5fa4c1b2260e3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/ObjectMapperFactory.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/ObjectMapperFactory.java
@@ -26,7 +26,8 @@
  */
 public final class ObjectMapperFactory {
 
-  private ObjectMapperFactory() {}
+  private ObjectMapperFactory() {
+  }
 
   /**
    * Creates a new {@link ObjectMapper} instance.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java
index f8167604c21ad..5b3d00f6b7362 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java
@@ -33,7 +33,8 @@
  * Schema utility class including serialization and deserialization.
  */
 public class SchemaUtility {
-  private SchemaUtility() {}
+  private SchemaUtility() {
+  }
 
   /**
    * Deserialize Arrow schema from byte array.
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
index f9f0357861c15..9e96e75880522 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
@@ -75,7 +75,8 @@
  */
 public class ValueVectorDataPopulator {
 
-  private ValueVectorDataPopulator(){}
+  private ValueVectorDataPopulator() {
+  }
 
   /**
    * Populate values for BigIntVector.

From 0c88d13341dfaba5109683bda25ee3ffcd808080 Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Tue, 6 Feb 2024 01:34:37 +0800
Subject: [PATCH 38/74] GH-39704: [C++][Parquet] Benchmark levels decoding 
 (#39705)

### Rationale for this change

This patch add the level-decoding benchmark. It test:
1. Different max-level (for flat type, maximum level would be 1, for nested type, it would grows)
2. With different repeat ( repeated null / non-null is different from non-repeated data)
3. With different read-batch size. This part of logic is a bit tricky in original code

### What changes are included in this PR?

Add Level decoding benchmark

### Are these changes tested?

No need

### Are there any user-facing changes?

no

* Closes: #39704

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/parquet/column_reader_benchmark.cc | 98 ++++++++++++++++++++++
 cpp/src/parquet/column_writer_test.cc      |  4 +-
 2 files changed, 100 insertions(+), 2 deletions(-)

diff --git a/cpp/src/parquet/column_reader_benchmark.cc b/cpp/src/parquet/column_reader_benchmark.cc
index 49b2317ede187..61fe397cf1c30 100644
--- a/cpp/src/parquet/column_reader_benchmark.cc
+++ b/cpp/src/parquet/column_reader_benchmark.cc
@@ -219,5 +219,103 @@ BENCHMARK(RecordReaderReadRecords)
     ->Args({2, 1000, true})
     ->Args({2, 1000, false});
 
+void GenerateLevels(int level_repeats, int max_level, int num_levels,
+                    std::vector<int16_t>* levels) {
+  // Generate random levels
+  std::default_random_engine gen(/*seed=*/1943);
+  std::uniform_int_distribution<int16_t> d(0, max_level);
+  for (int i = 0; i < num_levels;) {
+    int16_t current_level = d(gen);  // level repeat `level_repeats` times
+    const int current_repeated = std::min(level_repeats, num_levels - i);
+    levels->insert(levels->end(), current_repeated, current_level);
+    i += current_repeated;
+  }
+}
+
+void EncodeLevels(Encoding::type encoding, int16_t max_level, int num_levels,
+                  const int16_t* input_levels, std::vector<uint8_t>* bytes) {
+  LevelEncoder encoder;
+  // encode levels
+  if (encoding == Encoding::RLE) {
+    int rle_size = LevelEncoder::MaxBufferSize(encoding, max_level, num_levels);
+    bytes->resize(rle_size + sizeof(int32_t));
+    // leave space to write the rle length value
+    encoder.Init(encoding, max_level, num_levels, bytes->data() + sizeof(int32_t),
+                 rle_size);
+    encoder.Encode(num_levels, input_levels);
+    int data_length = encoder.len();
+    memcpy(bytes->data(), &data_length, sizeof(int32_t));
+  } else {
+    int bitpack_size =
+        LevelEncoder::MaxBufferSize(encoding, max_level, num_levels) + sizeof(int32_t);
+    bytes->resize(bitpack_size);
+    encoder.Init(encoding, max_level, num_levels, bytes->data(),
+                 static_cast<int>(bytes->size()));
+    encoder.Encode(num_levels, input_levels);
+  }
+}
+
+static void DecodeLevels(Encoding::type level_encoding, int16_t max_level, int num_levels,
+                         int batch_size, int level_repeat_count,
+                         ::benchmark::State& state) {
+  std::vector<uint8_t> bytes;
+  {
+    std::vector<int16_t> input_levels;
+    GenerateLevels(/*level_repeats=*/level_repeat_count, /*max_repeat_factor=*/max_level,
+                   num_levels, &input_levels);
+    EncodeLevels(level_encoding, max_level, num_levels, input_levels.data(), &bytes);
+  }
+
+  LevelDecoder decoder;
+  std::vector<int16_t> output_levels(batch_size);
+  for (auto _ : state) {
+    state.PauseTiming();
+    decoder.SetData(level_encoding, max_level, num_levels, bytes.data(),
+                    static_cast<int>(bytes.size()));
+    state.ResumeTiming();
+    // Decode multiple times with batch_size
+    while (true) {
+      int levels_decoded = decoder.Decode(batch_size, output_levels.data());
+      if (levels_decoded == 0) {
+        break;
+      }
+    }
+  }
+  state.SetBytesProcessed(state.iterations() * num_levels * sizeof(int16_t));
+  state.SetItemsProcessed(state.iterations() * num_levels);
+}
+
+static void ReadLevels_Rle(::benchmark::State& state) {
+  int16_t max_level = static_cast<int16_t>(state.range(0));
+  int num_levels = static_cast<int>(state.range(1));
+  int batch_size = static_cast<int>(state.range(2));
+  int level_repeat_count = static_cast<int>(state.range(3));
+  DecodeLevels(Encoding::RLE, max_level, num_levels, batch_size, level_repeat_count,
+               state);
+}
+
+static void ReadLevels_BitPack(::benchmark::State& state) {
+  int16_t max_level = static_cast<int16_t>(state.range(0));
+  int num_levels = static_cast<int>(state.range(1));
+  int batch_size = static_cast<int>(state.range(2));
+  int level_repeat_count = static_cast<int>(state.range(3));
+  DecodeLevels(Encoding::BIT_PACKED, max_level, num_levels, batch_size,
+               level_repeat_count, state);
+}
+
+static void ReadLevelsArguments(::benchmark::internal::Benchmark* b) {
+  b->ArgNames({"MaxLevel", "NumLevels", "BatchSize", "LevelRepeatCount"})
+      ->Args({1, 8096, 1024, 1})
+      ->Args({1, 8096, 1024, 7})
+      ->Args({1, 8096, 1024, 1024})
+      ->Args({1, 8096, 2048, 1})
+      ->Args({3, 8096, 1024, 1})
+      ->Args({3, 8096, 2048, 1})
+      ->Args({3, 8096, 1024, 7});
+}
+
+BENCHMARK(ReadLevels_Rle)->Apply(ReadLevelsArguments);
+BENCHMARK(ReadLevels_BitPack)->Apply(ReadLevelsArguments);
+
 }  // namespace benchmark
 }  // namespace parquet
diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc
index 97421629d2ca6..a40e71ce30aec 100644
--- a/cpp/src/parquet/column_writer_test.cc
+++ b/cpp/src/parquet/column_writer_test.cc
@@ -1021,7 +1021,7 @@ void EncodeLevels(Encoding::type encoding, int16_t max_level, int num_levels,
 }
 
 void VerifyDecodingLevels(Encoding::type encoding, int16_t max_level,
-                          std::vector<int16_t>& input_levels,
+                          const std::vector<int16_t>& input_levels,
                           std::vector<uint8_t>& bytes) {
   LevelDecoder decoder;
   int levels_count = 0;
@@ -1060,7 +1060,7 @@ void VerifyDecodingLevels(Encoding::type encoding, int16_t max_level,
 }
 
 void VerifyDecodingMultipleSetData(Encoding::type encoding, int16_t max_level,
-                                   std::vector<int16_t>& input_levels,
+                                   const std::vector<int16_t>& input_levels,
                                    std::vector<std::vector<uint8_t>>& bytes) {
   LevelDecoder decoder;
   int levels_count = 0;

From de53aac762fc703148f5822ed170b462a6b467d8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 5 Feb 2024 10:01:02 -0800
Subject: [PATCH 39/74] MINOR: [C#] Bump Grpc.Tools from 2.60.0 to 2.61.0 in
 /csharp (#39945)

Bumps [Grpc.Tools](https://github.com/grpc/grpc) from 2.60.0 to 2.61.0.
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/grpc/grpc/commits">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Grpc.Tools&package-manager=nuget&previous-version=2.60.0&new-version=2.61.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
index 68c3e47e01902..3a6ae28b390d2 100644
--- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
+++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
@@ -7,7 +7,7 @@
   <ItemGroup>
     <PackageReference Include="Google.Protobuf" Version="3.25.2" />
     <PackageReference Include="Grpc.Net.Client" Version="2.59.0" />
-    <PackageReference Include="Grpc.Tools" Version="2.60.0" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.61.0" PrivateAssets="All" />
   </ItemGroup>
 
   <ItemGroup Condition="'$(TargetFramework)'=='netstandard2.0'">

From 26801f147a9e98bb6c5bc4e7131bdf1bc2794467 Mon Sep 17 00:00:00 2001
From: Matt Topol <zotthewizard@gmail.com>
Date: Mon, 5 Feb 2024 15:29:06 -0500
Subject: [PATCH 40/74] GH-39769: [C++][Device] Fix Importing nested and string
 types for DeviceArray (#39770)

### Rationale for this change
In my testing with libcudf and other GPU data, I discovered a deficiency in ImportDeviceArray and thus ImportDeviceRecordBatch where the device type and memory manager aren't propagated to child importers and it fails to import offset-based types such as strings.

### What changes are included in this PR?
These are relatively easily handled by first ensuring that `ImportChild` propagates the device_type and memory manager from the parent. Then for importing offset based values we merely need to use the memory manager to copy the final offset value to the CPU to use for the buffer size computation.

This will work for any device which has implemented CopyBufferTo/From

### Are these changes tested?
A new test is added to test these situations.

* Closes: #39769

Authored-by: Matt Topol <zotthewizard@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 cpp/src/arrow/c/bridge.cc      | 23 ++++++++++++++++++++---
 cpp/src/arrow/c/bridge_test.cc | 10 ++++++++++
 cpp/src/arrow/device.cc        | 14 ++++++++++++++
 3 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 9b165a10a61e7..119249da99a6d 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -1543,6 +1543,8 @@ struct ArrayImporter {
     if (recursion_level_ >= kMaxImportRecursionLevel) {
       return Status::Invalid("Recursion level in ArrowArray struct exceeded");
     }
+    device_type_ = parent->device_type_;
+    memory_mgr_ = parent->memory_mgr_;
     // Child buffers will keep the entire parent import alive.
     // Perhaps we can move the child structs to an owned area
     // when the parent ImportedArrayData::Release() gets called,
@@ -1857,10 +1859,25 @@ struct ArrayImporter {
   template <typename OffsetType>
   Status ImportStringValuesBuffer(int32_t offsets_buffer_id, int32_t buffer_id,
                                   int64_t byte_width = 1) {
-    auto offsets = data_->GetValues<OffsetType>(offsets_buffer_id);
+    if (device_type_ == DeviceAllocationType::kCPU) {
+      auto offsets = data_->GetValues<OffsetType>(offsets_buffer_id);
+      // Compute visible size of buffer
+      int64_t buffer_size =
+          (c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 0;
+      return ImportBuffer(buffer_id, buffer_size);
+    }
+
+    // we only need the value of the last offset so let's just copy that
+    // one value from device to host.
+    auto single_value_buf =
+        SliceBuffer(data_->buffers[offsets_buffer_id],
+                    c_struct_->length * sizeof(OffsetType), sizeof(OffsetType));
+    ARROW_ASSIGN_OR_RAISE(
+        auto cpubuf, Buffer::ViewOrCopy(single_value_buf, default_cpu_memory_manager()));
+    auto offsets = cpubuf->data_as<OffsetType>();
     // Compute visible size of buffer
-    int64_t buffer_size =
-        (c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 0;
+    int64_t buffer_size = (c_struct_->length > 0) ? byte_width * offsets[0] : 0;
+
     return ImportBuffer(buffer_id, buffer_size);
   }
 
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 8b67027454c55..b8d5e0fcd3845 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -4320,6 +4320,16 @@ TEST_F(TestDeviceArrayRoundtrip, Primitive) {
   TestWithJSON(mm, int32(), "[4, 5, null]");
 }
 
+TEST_F(TestDeviceArrayRoundtrip, Struct) {
+  std::shared_ptr<Device> device = std::make_shared<MyDevice>(1);
+  auto mm = device->default_memory_manager();
+  auto type = struct_({field("ints", int16()), field("strs", utf8())});
+
+  TestWithJSON(mm, type, "[]");
+  TestWithJSON(mm, type, R"([[4, "foo"], [5, "bar"]])");
+  TestWithJSON(mm, type, R"([[4, null], null, [5, "foo"]])");
+}
+
 ////////////////////////////////////////////////////////////////////////////
 // Array stream export tests
 
diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc
index 616f89aae896f..3736a4e018c33 100644
--- a/cpp/src/arrow/device.cc
+++ b/cpp/src/arrow/device.cc
@@ -195,6 +195,13 @@ Result<std::shared_ptr<Buffer>> CPUMemoryManager::ViewBufferFrom(
   if (!from->is_cpu()) {
     return nullptr;
   }
+  // in this case the memory manager we're coming from is visible on the CPU,
+  // but uses an allocation type other than CPU. Since we know the data is visible
+  // to the CPU a "View" of this should use the CPUMemoryManager as the listed memory
+  // manager.
+  if (buf->device_type() != DeviceAllocationType::kCPU) {
+    return std::make_shared<Buffer>(buf->address(), buf->size(), shared_from_this(), buf);
+  }
   return buf;
 }
 
@@ -220,6 +227,13 @@ Result<std::shared_ptr<Buffer>> CPUMemoryManager::ViewBufferTo(
   if (!to->is_cpu()) {
     return nullptr;
   }
+  // in this case the memory manager we're coming from is visible on the CPU,
+  // but uses an allocation type other than CPU. Since we know the data is visible
+  // to the CPU a "View" of this should use the CPUMemoryManager as the listed memory
+  // manager.
+  if (buf->device_type() != DeviceAllocationType::kCPU) {
+    return std::make_shared<Buffer>(buf->address(), buf->size(), to, buf);
+  }
   return buf;
 }
 

From fd69d307447888101600376fa3016b727a3e0106 Mon Sep 17 00:00:00 2001
From: ZhangHuiGui <106943008+ZhangHuiGui@users.noreply.github.com>
Date: Tue, 6 Feb 2024 06:11:36 +0800
Subject: [PATCH 41/74] GH-39860: [C++] Expression ExecuteScalarExpression
 execute empty args function with a wrong result (#39908)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

Try to fix #39860.

### What changes are included in this PR?

Deal with the call->arguments.size() == 0's condition in ExecuteScalarExpression when we call some functions
has no arguments, like (random, hash_count ...).

### Are these changes tested?

Yes

### Are there any user-facing changes?

No.
* Closes: #39860

Lead-authored-by: hugo.zhang <hugo.zhang@openpie.com>
Co-authored-by: 张回归 <zhanghuigui@zhanghuiguideMacBook-Pro-1681.local>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/expression.cc      | 13 +++++++++++--
 cpp/src/arrow/compute/expression_test.cc | 19 +++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/compute/expression.cc b/cpp/src/arrow/compute/expression.cc
index b47e0a35525c5..8c59ad1df86f2 100644
--- a/cpp/src/arrow/compute/expression.cc
+++ b/cpp/src/arrow/compute/expression.cc
@@ -761,6 +761,15 @@ Result<Datum> ExecuteScalarExpression(const Expression& expr, const ExecBatch& i
     }
   }
 
+  int64_t input_length;
+  if (!arguments.empty() && all_scalar) {
+    // all inputs are scalar, so use a 1-long batch to avoid
+    // computing input.length equivalent outputs
+    input_length = 1;
+  } else {
+    input_length = input.length;
+  }
+
   auto executor = compute::detail::KernelExecutor::MakeScalar();
 
   compute::KernelContext kernel_context(exec_context, call->kernel);
@@ -772,8 +781,8 @@ Result<Datum> ExecuteScalarExpression(const Expression& expr, const ExecBatch& i
   RETURN_NOT_OK(executor->Init(&kernel_context, {kernel, types, options}));
 
   compute::detail::DatumAccumulator listener;
-  RETURN_NOT_OK(executor->Execute(
-      ExecBatch(std::move(arguments), all_scalar ? 1 : input.length), &listener));
+  RETURN_NOT_OK(
+      executor->Execute(ExecBatch(std::move(arguments), input_length), &listener));
   const auto out = executor->WrapResults(arguments, listener.values());
 #ifndef NDEBUG
   DCHECK_OK(executor->CheckResultType(out, call->function_name.c_str()));
diff --git a/cpp/src/arrow/compute/expression_test.cc b/cpp/src/arrow/compute/expression_test.cc
index 44159e76600fb..d33c348cd77da 100644
--- a/cpp/src/arrow/compute/expression_test.cc
+++ b/cpp/src/arrow/compute/expression_test.cc
@@ -863,6 +863,25 @@ TEST(Expression, ExecuteCall) {
   ])"));
 }
 
+TEST(Expression, ExecuteCallWithNoArguments) {
+  const int kCount = 10;
+  auto random_options = RandomOptions::FromSeed(/*seed=*/0);
+  ExecBatch input({}, kCount);
+
+  Expression random_expr = call("random", {}, random_options);
+  ASSERT_OK_AND_ASSIGN(random_expr, random_expr.Bind(float64()));
+
+  ASSERT_OK_AND_ASSIGN(Datum actual, ExecuteScalarExpression(random_expr, input));
+  compute::ExecContext* exec_context = default_exec_context();
+  ASSERT_OK_AND_ASSIGN(auto function,
+                       exec_context->func_registry()->GetFunction("random"));
+  ASSERT_OK_AND_ASSIGN(Datum expected,
+                       function->Execute(input, &random_options, exec_context));
+  AssertDatumsEqual(actual, expected, /*verbose=*/true);
+
+  EXPECT_EQ(actual.length(), kCount);
+}
+
 TEST(Expression, ExecuteDictionaryTransparent) {
   ExpectExecute(
       equal(field_ref("a"), field_ref("b")),

From 0415a60eebdaf8130ca3028a802529ecfb738493 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 6 Feb 2024 08:44:29 +0900
Subject: [PATCH 42/74] GH-39883: [CI][R][Windows] Use
 ci/scripts/install_minio.sh with Git bash (#39929)

### Rationale for this change

`curl` in Rtools can't be used on non Rtools' MSYS2 environment. Because `curl` in Rtools can't refer `/usr/ssl/certs/ca-bundle.crt` on non Rtools' MSYS2 environment.

### What changes are included in this PR?

Use the `bash` in GitHub Actions Runner. `curl` in the environment works.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #39883

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/r.yml     | 41 ++++++++++++-----------
 ci/scripts/install_minio.sh | 67 +++++++++++++++++++++++++------------
 2 files changed, 67 insertions(+), 41 deletions(-)

diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 2a801b6040ec8..3d1f75ede4bb5 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -21,24 +21,26 @@ on:
   push:
     paths:
       - ".github/workflows/r.yml"
-      - "ci/scripts/r_*.sh"
-      - "ci/scripts/cpp_*.sh"
-      - "ci/scripts/PKGBUILD"
-      - "ci/etc/rprofile"
       - "ci/docker/**"
+      - "ci/etc/rprofile"
+      - "ci/scripts/PKGBUILD"
+      - "ci/scripts/cpp_*.sh"
+      - "ci/scripts/install_minio.sh"
+      - "ci/scripts/r_*.sh"
       - "cpp/**"
-      - 'docker-compose.yml'
+      - "docker-compose.yml"
       - "r/**"
   pull_request:
     paths:
       - ".github/workflows/r.yml"
-      - "ci/scripts/r_*.sh"
-      - "ci/scripts/cpp_*.sh"
-      - "ci/scripts/PKGBUILD"
-      - "ci/etc/rprofile"
       - "ci/docker/**"
+      - "ci/etc/rprofile"
+      - "ci/scripts/PKGBUILD"
+      - "ci/scripts/cpp_*.sh"
+      - "ci/scripts/install_minio.sh"
+      - "ci/scripts/r_*.sh"
       - "cpp/**"
-      - 'docker-compose.yml'
+      - "docker-compose.yml"
       - "r/**"
 
 concurrency:
@@ -256,6 +258,16 @@ jobs:
         uses: actions/checkout@v4
         with:
           fetch-depth: 0
+      # This must be done before r-lib/actions/setup-r because curl in
+      # Rtools doesn't work on non Rtools' MSYS2 environment. If we
+      # use "shell: bash" after r-lib/actions/setup-r, bash in Rtools
+      # is used on non Rtools' MSYS2 environment.
+      - name: Install MinIO
+        shell: bash
+        run: |
+          mkdir -p "$HOME/.local/bin"
+          ci/scripts/install_minio.sh latest "$HOME/.local"
+          echo "$HOME/.local/bin" >> $GITHUB_PATH
       - run: mkdir r/windows
       - name: Download artifacts
         uses: actions/download-artifact@v3
@@ -282,15 +294,6 @@ jobs:
           working-directory: 'r'
           extra-packages: |
             any::rcmdcheck
-      - name: Install MinIO
-        shell: bash
-        run: |
-          mkdir -p "$HOME/.local/bin"
-          curl \
-            --output "$HOME/.local/bin/minio.exe" \
-            https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z
-          chmod +x "$HOME/.local/bin/minio.exe"
-          echo "$HOME/.local/bin" >> $GITHUB_PATH
       # TODO(ARROW-17149): figure out why the GCS tests are hanging on Windows
       # - name: Install Google Cloud Storage Testbench
       #   shell: bash
diff --git a/ci/scripts/install_minio.sh b/ci/scripts/install_minio.sh
index 6ea8e1a095c39..e493a183b4543 100755
--- a/ci/scripts/install_minio.sh
+++ b/ci/scripts/install_minio.sh
@@ -17,7 +17,15 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -e
+set -eu
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <version> <prefix>"
+  exit 1
+fi
+
+version=$1
+prefix=$2
 
 declare -A archs
 archs=([x86_64]=amd64
@@ -25,45 +33,60 @@ archs=([x86_64]=amd64
        [aarch64]=arm64
        [s390x]=s390x)
 
-declare -A platforms
-platforms=([Linux]=linux
-           [Darwin]=darwin)
-
 arch=$(uname -m)
-platform=$(uname)
-version=$1
-prefix=$2
-
-if [ "$#" -ne 2 ]; then
-  echo "Usage: $0 <version> <prefix>"
-  exit 1
-elif [ -z ${archs[$arch]} ]; then
+if [ -z ${archs[$arch]} ]; then
   echo "Unsupported architecture: ${arch}"
   exit 0
-elif [ -z ${platforms[$platform]} ]; then
-  echo "Unsupported platform: ${platform}"
-  exit 0
-elif [ "${version}" != "latest" ]; then
+fi
+arch=${archs[$arch]}
+
+platform=$(uname)
+case ${platform} in
+  Linux)
+    platform=linux
+    ;;
+  Darwin)
+    platform=darwin
+    ;;
+  MSYS_NT*|MINGW64_NT*)
+    platform=windows
+    ;;
+  *)
+    echo "Unsupported platform: ${platform}"
+    exit 0
+    ;;
+esac
+
+if [ "${version}" != "latest" ]; then
   echo "Cannot fetch specific versions of minio, only latest is supported."
   exit 1
 fi
 
-arch=${archs[$arch]}
-platform=${platforms[$platform]}
-
 # Use specific versions for minio server and client to avoid CI failures on new releases.
 minio_version="minio.RELEASE.2022-05-26T05-48-41Z"
 mc_version="mc.RELEASE.2022-05-09T04-08-26Z"
 
+download()
+{
+  local output=$1
+  local url=$2
+
+  if type wget > /dev/null 2>&1; then
+    wget -nv --output-document ${output} ${url}
+  else
+    curl --fail --location --output ${output} ${url}
+  fi
+}
+
 if [[ ! -x ${prefix}/bin/minio ]]; then
   url="https://dl.min.io/server/minio/release/${platform}-${arch}/archive/${minio_version}"
   echo "Fetching ${url}..."
-  wget -nv --output-document ${prefix}/bin/minio ${url}
+  download ${prefix}/bin/minio ${url}
   chmod +x ${prefix}/bin/minio
 fi
 if [[ ! -x ${prefix}/bin/mc ]]; then
   url="https://dl.min.io/client/mc/release/${platform}-${arch}/archive/${mc_version}"
   echo "Fetching ${url}..."
-  wget -nv --output-document ${prefix}/bin/mc ${url}
+  download ${prefix}/bin/mc ${url}
   chmod +x ${prefix}/bin/mc
 fi

From 9db823b45fd4ae455c531e944681c898bede7d53 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 6 Feb 2024 08:50:58 +0900
Subject: [PATCH 43/74] GH-39930: [C++] Use Requires instead of Libs for system
 RE2 in arrow.pc (#39932)

### Rationale for this change

We chose Libs{,.private} with libre2.a for system RE2 in GH-10626. Because "Require{,.private} re2" may add "-std=c++11". If "-std=c++11" was added, users can't build Apache Arrow C++ because Apache Arrow C++ requires C++17 or later.

But this approach doesn't work with RE2 2024-06-01 or later because it at least requires Abseil. If we keep the Libs{,.private} approach, we also need to add Abseil libraries to Libs{,.private}. But it's unmaintainable.

### What changes are included in this PR?

Let's use "Requires{,.private} re2" instead of Libs{,.private}. I hope recent re2.pc doesn't add "-std=c++11".

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* Closes: #39930

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 6bb9c0f6af2ca..0238c26c0fb51 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2594,16 +2594,11 @@ macro(build_re2)
 endmacro()
 
 if(ARROW_WITH_RE2)
-  # Don't specify "PC_PACKAGE_NAMES re2" here because re2.pc may
-  # include -std=c++11. It's not compatible with C source and C++
-  # source not uses C++ 11.
-  resolve_dependency(re2 HAVE_ALT TRUE)
-  if(${re2_SOURCE} STREQUAL "SYSTEM" AND ARROW_BUILD_STATIC)
-    get_target_property(RE2_TYPE re2::re2 TYPE)
-    if(NOT RE2_TYPE STREQUAL "INTERFACE_LIBRARY")
-      string(APPEND ARROW_PC_LIBS_PRIVATE " $<TARGET_FILE:re2::re2>")
-    endif()
-  endif()
+  resolve_dependency(re2
+                     HAVE_ALT
+                     TRUE
+                     PC_PACKAGE_NAMES
+                     re2)
   add_definitions(-DARROW_WITH_RE2)
 endif()
 

From 0896d5b86510b9d410fd849610e2e1dedc77bf03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Tue, 6 Feb 2024 01:41:27 +0100
Subject: [PATCH 44/74] GH-39943: [CI][Python] Update manylinux images to avoid
 GPG problems downloading packages (#39944)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

Old manylinux images seem to have issues with a GPG key in order to download packages.

### What changes are included in this PR?

Update the manylinux image used for the latest one.

### Are these changes tested?

Via archery jobs

### Are there any user-facing changes?

No
* Closes: #39943

Authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 docker-compose.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 0252c4ec8a896..5c84d24fd7df7 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1030,7 +1030,7 @@ services:
       args:
         arch: ${ARCH}
         arch_short: ${ARCH_SHORT}
-        base: quay.io/pypa/manylinux2014_${ARCH_ALIAS}:2023-10-03-72cdc42
+        base: quay.io/pypa/manylinux2014_${ARCH_ALIAS}:2024-02-04-ea37246
         vcpkg: ${VCPKG}
         python: ${PYTHON}
         manylinux: 2014
@@ -1053,7 +1053,7 @@ services:
       args:
         arch: ${ARCH}
         arch_short: ${ARCH_SHORT}
-        base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2023-10-03-72cdc42
+        base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2024-02-04-ea37246
         vcpkg: ${VCPKG}
         python: ${PYTHON}
         manylinux: 2_28

From 15525102992fbe83e6ce0943fe09e3f23a1287f4 Mon Sep 17 00:00:00 2001
From: Thomas Newton <thomas.w.newton@gmail.com>
Date: Tue, 6 Feb 2024 00:48:57 +0000
Subject: [PATCH 45/74] GH-39621: [CI][Packaging] Update vcpkg to 2023.11.20
 release (#39622)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change
Old version of vcpkg is blocking https://github.com/apache/arrow/issues/39352

### What changes are included in this PR?
- Upgrade vcpkg
- Fix ports patches
- Upgrade visual studio used in windows wheel builds. VS2019 is now required for the vcpkg `abseil` build.
- Move `VCPKG_FORCE_SYSTEM_BINARIES` to be set before vcpkg install to fix vcpkg install on linux ARM.
- Fix for LLVM 17 which requires that an executable exports "llvm_orc_registerEHFrameSectionWrapper()" and "llvm_orc_unregisterEHFrameSectionWrapper()". This effects the java builds that depend on llvm from vcpkg for gandiva.
- Update image used for python wheel builds on windows to 2024-02-05

### Are these changes tested?
Does not change any behaviour so should be covered by existing tests

### Are there any user-facing changes?
There shouldn't be
* Closes: #39621

Lead-authored-by: Thomas Newton <thomas.w.newton@gmail.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Co-authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .env                                          |  6 ++---
 ci/docker/python-wheel-manylinux.dockerfile   |  7 ++---
 ...thon-wheel-windows-test-vs2019.dockerfile} |  4 +--
 ...=> python-wheel-windows-vs2019.dockerfile} |  4 +--
 ci/scripts/python_wheel_windows_build.bat     |  8 +++---
 ci/vcpkg/ports.patch                          | 27 ++++++++++---------
 ci/vcpkg/vcpkg.json                           |  5 +++-
 cpp/src/gandiva/CMakeLists.txt                |  9 +++++++
 dev/tasks/python-wheels/github.windows.yml    | 12 ++++-----
 docker-compose.yml                            | 16 +++++------
 10 files changed, 57 insertions(+), 41 deletions(-)
 rename ci/docker/{python-wheel-windows-test-vs2017.dockerfile => python-wheel-windows-test-vs2019.dockerfile} (96%)
 rename ci/docker/{python-wheel-windows-vs2017.dockerfile => python-wheel-windows-vs2019.dockerfile} (98%)

diff --git a/.env b/.env
index 427a4ab0bf398..eb87dc62bdd8c 100644
--- a/.env
+++ b/.env
@@ -92,13 +92,13 @@ DEVTOOLSET_VERSION=
 # Used through docker-compose.yml and serves as the default version for the
 # ci/scripts/install_vcpkg.sh script. Prefer to use short SHAs to keep the
 # docker tags more readable.
-VCPKG="501db0f17ef6df184fcdbfbe0f87cde2313b6ab1"    # 2023.04.15 Release
+VCPKG="a42af01b72c28a8e1d7b48107b33e4f286a55ef6"    # 2023.11.20 Release
 
 # This must be updated when we update
-# ci/docker/python-wheel-windows-vs2017.dockerfile.
+# ci/docker/python-wheel-windows-vs2019.dockerfile.
 # This is a workaround for our CI problem that "archery docker build" doesn't
 # use pulled built images in dev/tasks/python-wheels/github.windows.yml.
-PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2023-08-02
+PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2024-02-05
 
 # Use conanio/${CONAN} for "docker-compose run --rm conan". See
 # https://github.com/conan-io/conan-docker-tools#readme for available
diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile
index a07c727ac76fa..2831440d5a967 100644
--- a/ci/docker/python-wheel-manylinux.dockerfile
+++ b/ci/docker/python-wheel-manylinux.dockerfile
@@ -62,15 +62,16 @@ COPY ci/vcpkg/*.patch \
 COPY ci/scripts/install_vcpkg.sh \
      arrow/ci/scripts/
 ENV VCPKG_ROOT=/opt/vcpkg
-RUN arrow/ci/scripts/install_vcpkg.sh ${VCPKG_ROOT} ${vcpkg}
-ENV PATH="${PATH}:${VCPKG_ROOT}"
-
 ARG build_type=release
 ENV CMAKE_BUILD_TYPE=${build_type} \
     VCPKG_FORCE_SYSTEM_BINARIES=1 \
     VCPKG_OVERLAY_TRIPLETS=/arrow/ci/vcpkg \
     VCPKG_DEFAULT_TRIPLET=${arch_short}-linux-static-${build_type} \
     VCPKG_FEATURE_FLAGS="manifests"
+
+RUN arrow/ci/scripts/install_vcpkg.sh ${VCPKG_ROOT} ${vcpkg}
+ENV PATH="${PATH}:${VCPKG_ROOT}"
+
 COPY ci/vcpkg/vcpkg.json arrow/ci/vcpkg/
 # cannot use the S3 feature here because while aws-sdk-cpp=1.9.160 contains
 # ssl related fixes as well as we can patch the vcpkg portfile to support
diff --git a/ci/docker/python-wheel-windows-test-vs2017.dockerfile b/ci/docker/python-wheel-windows-test-vs2019.dockerfile
similarity index 96%
rename from ci/docker/python-wheel-windows-test-vs2017.dockerfile
rename to ci/docker/python-wheel-windows-test-vs2019.dockerfile
index e842ede18454b..67d99fa9c5724 100644
--- a/ci/docker/python-wheel-windows-test-vs2017.dockerfile
+++ b/ci/docker/python-wheel-windows-test-vs2019.dockerfile
@@ -19,8 +19,8 @@
 # when you update this file.
 
 # based on mcr.microsoft.com/windows/servercore:ltsc2019
-# contains choco and vs2017 preinstalled
-FROM abrarov/msvc-2017:2.11.0
+# contains choco and vs2019 preinstalled
+FROM abrarov/msvc-2019:2.11.0
 
 # Add unix tools to path
 RUN setx path "%path%;C:\Program Files\Git\usr\bin"
diff --git a/ci/docker/python-wheel-windows-vs2017.dockerfile b/ci/docker/python-wheel-windows-vs2019.dockerfile
similarity index 98%
rename from ci/docker/python-wheel-windows-vs2017.dockerfile
rename to ci/docker/python-wheel-windows-vs2019.dockerfile
index 067105b3a7995..b8e8aad952b1c 100644
--- a/ci/docker/python-wheel-windows-vs2017.dockerfile
+++ b/ci/docker/python-wheel-windows-vs2019.dockerfile
@@ -19,8 +19,8 @@
 # when you update this file.
 
 # based on mcr.microsoft.com/windows/servercore:ltsc2019
-# contains choco and vs2017 preinstalled
-FROM abrarov/msvc-2017:2.11.0
+# contains choco and vs2019 preinstalled
+FROM abrarov/msvc-2019:2.11.0
 
 # Install CMake and Ninja
 ARG cmake=3.21.4
diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat
index ffb43b3481e55..73b0192d9bc97 100644
--- a/ci/scripts/python_wheel_windows_build.bat
+++ b/ci/scripts/python_wheel_windows_build.bat
@@ -19,7 +19,7 @@
 
 echo "Building windows wheel..."
 
-call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat"
+call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
 
 echo "=== (%PYTHON_VERSION%) Clear output directories and leftovers ==="
 del /s /q C:\arrow-build
@@ -50,7 +50,8 @@ set ARROW_WITH_SNAPPY=ON
 set ARROW_WITH_ZLIB=ON
 set ARROW_WITH_ZSTD=ON
 set CMAKE_UNITY_BUILD=ON
-set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
+set CMAKE_GENERATOR=Visual Studio 16 2019
+set CMAKE_PLATFORM=x64
 set VCPKG_ROOT=C:\vcpkg
 set VCPKG_FEATURE_FLAGS=-manifests
 set VCGPK_TARGET_TRIPLET=amd64-windows-static-md-%CMAKE_BUILD_TYPE%
@@ -96,6 +97,7 @@ cmake ^
     -DVCPKG_MANIFEST_MODE=OFF ^
     -DVCPKG_TARGET_TRIPLET=%VCGPK_TARGET_TRIPLET% ^
     -G "%CMAKE_GENERATOR%" ^
+    -A "%CMAKE_PLATFORM%" ^
     C:\arrow\cpp || exit /B 1
 cmake --build . --config %CMAKE_BUILD_TYPE% --target install || exit /B 1
 popd
@@ -121,6 +123,6 @@ set CMAKE_PREFIX_PATH=C:\arrow-dist
 
 pushd C:\arrow\python
 @REM bundle the msvc runtime
-cp "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Redist\MSVC\14.16.27012\x64\Microsoft.VC141.CRT\msvcp140.dll" pyarrow\
+cp "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Redist\MSVC\14.28.29325\x64\Microsoft.VC142.CRT\msvcp140.dll" pyarrow\
 python setup.py bdist_wheel || exit /B 1
 popd
diff --git a/ci/vcpkg/ports.patch b/ci/vcpkg/ports.patch
index 68f6cae5addc9..0d4fb540a2003 100644
--- a/ci/vcpkg/ports.patch
+++ b/ci/vcpkg/ports.patch
@@ -1,13 +1,14 @@
 diff --git a/ports/curl/portfile.cmake b/ports/curl/portfile.cmake
-index 5a14562..924b1b7 100644
+index bdc544e9e..53f6bbc3b 100644
 --- a/ports/curl/portfile.cmake
 +++ b/ports/curl/portfile.cmake
-@@ -87,8 +87,11 @@ vcpkg_cmake_configure(
+@@ -74,9 +74,12 @@ vcpkg_cmake_configure(
          -DENABLE_MANUAL=OFF
          -DCURL_CA_FALLBACK=ON
          -DCURL_USE_LIBPSL=OFF
 +        -DCURL_CA_PATH=none
 +        -DCURL_CA_BUNDLE=none
+         -DCMAKE_DISABLE_FIND_PACKAGE_Perl=ON
      OPTIONS_DEBUG
          -DENABLE_DEBUG=ON
 +        ${EXTRA_ARGS_DEBUG}
@@ -15,29 +16,29 @@ index 5a14562..924b1b7 100644
  vcpkg_cmake_install()
  vcpkg_copy_pdbs()
 diff --git a/ports/snappy/portfile.cmake b/ports/snappy/portfile.cmake
-index 8f3f3f9..745b0fb 100644
+index 0c7098082..c603c3653 100644
 --- a/ports/snappy/portfile.cmake
 +++ b/ports/snappy/portfile.cmake
-@@ -9,6 +9,7 @@ vcpkg_from_github(
-     HEAD_REF master
+@@ -10,6 +10,7 @@ vcpkg_from_github(
      PATCHES
          fix_clang-cl_build.patch
+         no-werror.patch
 +        "snappy-disable-bmi.patch"
  )
  
  vcpkg_cmake_configure(
 diff --git a/ports/snappy/snappy-disable-bmi.patch b/ports/snappy/snappy-disable-bmi.patch
 new file mode 100644
-index 0000000..a57ce0c
+index 000000000..e839c93a4
 --- /dev/null
 +++ b/ports/snappy/snappy-disable-bmi.patch
 @@ -0,0 +1,19 @@
 +diff --git a/snappy.cc b/snappy.cc
-+index 79dc0e8..f3153ea 100644
++index d414718..7b49d2a 100644
 +--- a/snappy.cc
 ++++ b/snappy.cc
-+@@ -965,14 +965,10 @@ static inline void Report(const char *algorithm, size_t compressed_size,
-+ static inline uint32_t ExtractLowBytes(uint32_t v, int n) {
++@@ -1014,14 +1014,10 @@ static inline void Report(const char *algorithm, size_t compressed_size,
++ static inline uint32_t ExtractLowBytes(const uint32_t& v, int n) {
 +   assert(n >= 0);
 +   assert(n <= 4);
 +-#if SNAPPY_HAVE_BMI2
@@ -52,13 +53,13 @@ index 0000000..a57ce0c
 + 
 + static inline bool LeftShiftOverflows(uint8_t value, uint32_t shift) {
 diff --git a/ports/llvm/portfile.cmake b/ports/llvm/portfile.cmake
-index 4d7e26a..1f054a2 100644
+index bf9397b66..c3112b673 100644
 --- a/ports/llvm/portfile.cmake
 +++ b/ports/llvm/portfile.cmake
-@@ -274,6 +274,8 @@ vcpkg_cmake_configure(
+@@ -293,6 +293,8 @@ vcpkg_cmake_configure(
+         ${FEATURE_OPTIONS}
+     MAYBE_UNUSED_VARIABLES 
          COMPILER_RT_ENABLE_IOS
-         OPENMP_TOOLS_INSTALL_DIR
-         MLIR_TOOLS_INSTALL_DIR
 +        BOLT_TOOLS_INSTALL_DIR
 +        LIBOMP_INSTALL_ALIASES
  )
diff --git a/ci/vcpkg/vcpkg.json b/ci/vcpkg/vcpkg.json
index 71c23165e61f0..99771728ecf18 100644
--- a/ci/vcpkg/vcpkg.json
+++ b/ci/vcpkg/vcpkg.json
@@ -81,8 +81,11 @@
           "default-features": false,
           "features": [
             "clang",
-            "default-options",
             "default-targets",
+            "enable-bindings",
+            "enable-terminfo",
+            "enable-zlib",
+            "enable-zstd",
             "enable-rtti",
             "lld",
             "tools"
diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index 3f038f54a7b27..d773fb5ff5895 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -229,6 +229,15 @@ function(ADD_GANDIVA_TEST REL_TEST_NAME)
 
   set(TEST_NAME gandiva-${REL_TEST_NAME})
   string(REPLACE "_" "-" TEST_NAME ${TEST_NAME})
+
+  if(ARG_USE_STATIC_LINKING OR ARROW_TEST_LINKAGE STREQUAL "static")
+    # LLVM 17 or later requires that an executable exports
+    # "llvm_orc_registerEHFrameSectionWrapper()" and
+    # "llvm_orc_unregisterEHFrameSectionWrapper()". We need to do
+    # nothing when we use libLLVM.so. But we need to export symbols
+    # explicitly when we use libLLVM*.a.
+    set_target_properties(${TEST_NAME} PROPERTIES ENABLE_EXPORTS TRUE)
+  endif()
 endfunction()
 
 add_gandiva_test(internals-test
diff --git a/dev/tasks/python-wheels/github.windows.yml b/dev/tasks/python-wheels/github.windows.yml
index 1641796a719e2..01f4977a9b0b1 100644
--- a/dev/tasks/python-wheels/github.windows.yml
+++ b/dev/tasks/python-wheels/github.windows.yml
@@ -29,7 +29,7 @@ jobs:
       # this is a private repository at the moment (mostly because of licensing
       # consideration of windows images with visual studio), but anyone can
       # recreate the image by manually building it via:
-      # `archery build python-wheel-windows-vs2017`
+      # `archery build python-wheel-windows-vs2019`
       # note that we don't run docker build since there wouldn't be a cache hit
       # and rebuilding the dependencies takes a fair amount of time
       REPO: ghcr.io/ursacomputing/arrow
@@ -46,17 +46,17 @@ jobs:
         run: |
           cd arrow
           @rem We want to use only
-          @rem   archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2017
+          @rem   archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2019
           @rem but it doesn't use pulled caches.
           @rem It always build an image from scratch.
           @rem We can remove this workaround once we find a way to use
           @rem pulled caches when build an image.
           echo on
-          archery docker pull --no-ignore-pull-failures python-wheel-windows-vs2017
+          archery docker pull --no-ignore-pull-failures python-wheel-windows-vs2019
           if errorlevel 1 (
-            archery docker build --no-pull python-wheel-windows-vs2017 || exit /B 1
+            archery docker build --no-pull python-wheel-windows-vs2019 || exit /B 1
           )
-          archery docker run --no-build -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2017
+          archery docker run --no-build -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2019
 
       - uses: actions/upload-artifact@v3
         with:
@@ -77,5 +77,5 @@ jobs:
         shell: cmd
         run: |
           cd arrow
-          archery docker push python-wheel-windows-vs2017
+          archery docker push python-wheel-windows-vs2019
       {% endif %}
diff --git a/docker-compose.yml b/docker-compose.yml
index 5c84d24fd7df7..8a7223b57632f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -172,7 +172,7 @@ x-hierarchy:
   - python-wheel-manylinux-2-28
   - python-wheel-manylinux-test-imports
   - python-wheel-manylinux-test-unittests
-  - python-wheel-windows-vs2017
+  - python-wheel-windows-vs2019
   - python-wheel-windows-test
 
 volumes:
@@ -1098,19 +1098,19 @@ services:
       CHECK_UNITTESTS: "ON"
     command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
 
-  python-wheel-windows-vs2017:
-    image: ${REPO}:python-${PYTHON}-wheel-windows-vs2017-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
+  python-wheel-windows-vs2019:
+    image: ${REPO}:python-${PYTHON}-wheel-windows-vs2019-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
     build:
       args:
         vcpkg: ${VCPKG}
         python: ${PYTHON}
       context: .
-      dockerfile: ci/docker/python-wheel-windows-vs2017.dockerfile
+      dockerfile: ci/docker/python-wheel-windows-vs2019.dockerfile
       # This should make the pushed images reusable, but the image gets rebuilt.
       # Uncomment if no local cache is available.
       # cache_from:
-      #   - abrarov/msvc-2017:2.11.0
-      #   - ${REPO}:python-${PYTHON}-wheel-windows-vs2017-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
+      #   - abrarov/msvc-2019:2.11.0
+      #   - ${REPO}:python-${PYTHON}-wheel-windows-vs2019-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
     volumes:
       - "${DOCKER_VOLUME_PREFIX}python-wheel-windows-clcache:C:/clcache"
       - type: bind
@@ -1119,12 +1119,12 @@ services:
     command: arrow\\ci\\scripts\\python_wheel_windows_build.bat
 
   python-wheel-windows-test:
-    image: ${REPO}:python-${PYTHON}-wheel-windows-test-vs2017-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
+    image: ${REPO}:python-${PYTHON}-wheel-windows-test-vs2019-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
     build:
       args:
         python: ${PYTHON}
       context: .
-      dockerfile: ci/docker/python-wheel-windows-test-vs2017.dockerfile
+      dockerfile: ci/docker/python-wheel-windows-test-vs2019.dockerfile
     volumes:
       - "${DOCKER_VOLUME_PREFIX}python-wheel-windows-clcache:C:/clcache"
       - type: bind

From 874e59670773bd0d52d9c6811483c00abc5ee736 Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Tue, 6 Feb 2024 01:54:13 +0100
Subject: [PATCH 46/74] GH-39737: [Release][Docs] Update post release
 documentation task (#39762)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR updates the `dev/release/post-08-docs.sh` task so that

- `DOCUMENTATION_OPTIONS.theme_switcher_version_match` changes from `""` to `"{previous_version}"`
- `DOCUMENTATION_OPTIONS.show_version_warning_banner` changes from `false` to `true`

for the documentation that is moved to a subfolder when a new major release is done.
* Closes: #39737

Lead-authored-by: AlenkaF <frim.alenka@gmail.com>
Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com>
Co-authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/post-08-docs.sh | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/dev/release/post-08-docs.sh b/dev/release/post-08-docs.sh
index f18f7d10c73e6..4df574700e812 100755
--- a/dev/release/post-08-docs.sh
+++ b/dev/release/post-08-docs.sh
@@ -86,6 +86,21 @@ if [ "$is_major_release" = "yes" ] ; then
 fi
 git add docs
 git commit -m "[Website] Update documentations for ${version}"
+
+# Update DOCUMENTATION_OPTIONS.theme_switcher_version_match and
+# DOCUMENTATION_OPTIONS.show_version_warning_banner
+pushd docs/${previous_series}
+find ./ \
+  -type f \
+  -exec \
+    sed -i.bak \
+      -e "s/DOCUMENTATION_OPTIONS.theme_switcher_version_match = '';/DOCUMENTATION_OPTIONS.theme_switcher_version_match = '${previous_version}';/g" \
+      -e "s/DOCUMENTATION_OPTIONS.show_version_warning_banner = false/DOCUMENTATION_OPTIONS.show_version_warning_banner = true/g" \
+      {} \;
+find ./ -name '*.bak' -delete
+popd
+git add docs/${previous_series}
+git commit -m "[Website] Update warning banner for ${previous_series}"
 git clean -d -f -x
 popd
 

From 062c841836642ab95b1ffde031d271fffd29987d Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 6 Feb 2024 10:56:16 +0900
Subject: [PATCH 47/74] GH-39057: [CI][C++][Go] Don't run jobs that use a
 self-hosted GitHub Actions Runner on fork (#39903)

### Rationale for this change

If jobs that use a self-hosted GitHub Actions Runner on fork are submitted on fork, they will timeout eventually and report noisy failure notifications.

### What changes are included in this PR?

We can't use `jobs.<job_id>.if` to reject jobs that use self-hosted GitHub Actions Runner because `jobs.<job_id>.if` is evaluated before `jobs.<job_id>.strategy.matrix`.

https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idif

> Note: The `jobs.<job_id>.if` condition is evaluated before
> `jobs.<job_id>.strategy.matrix` is applied.

We can use output `jobs<job_id>.outputs` instead. See also:

* https://docs.github.com/en/actions/using-jobs/defining-outputs-for-jobs
* https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idoutputs

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #39057

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/cpp.yml | 74 +++++++++++++++++++++++++++------------
 .github/workflows/go.yml  | 65 +++++++++++++++++++++++++---------
 2 files changed, 99 insertions(+), 40 deletions(-)

diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 9fbad06692bd2..e9409f1cd6248 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -57,37 +57,65 @@ env:
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
+  docker-targets:
+    name: Docker targets
+    runs-on: ubuntu-latest
+    outputs:
+      targets: ${{ steps.detect-targets.outputs.targets }}
+    steps:
+      - name: Detect targets
+        id: detect-targets
+        run: |
+          echo "targets<<JSON" >> "$GITHUB_OUTPUT"
+          echo "[" >> "$GITHUB_OUTPUT"
+          cat <<JSON >> "$GITHUB_OUTPUT"
+          {
+            "arch": "amd64",
+            "clang-tools": "14",
+            "image": "conda-cpp",
+            "llvm": "14",
+            "runs-on": "ubuntu-latest",
+            "simd-level": "AVX2",
+            "title": "AMD64 Conda C++ AVX2",
+            "ubuntu": "22.04"
+          },
+          {
+            "arch": "amd64",
+            "clang-tools": "14",
+            "image": "ubuntu-cpp-sanitizer",
+            "llvm": "14",
+            "runs-on": "ubuntu-latest",
+            "title": "AMD64 Ubuntu 22.04 C++ ASAN UBSAN",
+            "ubuntu": "22.04"
+          }
+          JSON
+          if [ "$GITHUB_REPOSITORY_OWNER" = "apache" ]; then
+            echo "," >> "$GITHUB_OUTPUT"
+            cat <<JSON >> "$GITHUB_OUTPUT"
+          {
+            "arch": "arm64v8",
+            "clang-tools": "10",
+            "image": "ubuntu-cpp",
+            "llvm": "10",
+            "runs-on": ["self-hosted", "arm", "linux"],
+            "title": "ARM64 Ubuntu 20.04 C++",
+            "ubuntu": "20.04"
+          }
+          JSON
+          fi
+          echo "]" >> "$GITHUB_OUTPUT"
+          echo "JSON" >> "$GITHUB_OUTPUT"
+
   docker:
     name: ${{ matrix.title }}
+    needs: docker-targets
     runs-on: ${{ matrix.runs-on }}
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     timeout-minutes: 75
     strategy:
       fail-fast: false
       matrix:
-        include:
-          - arch: amd64
-            clang-tools: "14"
-            image: conda-cpp
-            llvm: "14"
-            runs-on: ubuntu-latest
-            simd-level: AVX2
-            title: AMD64 Conda C++ AVX2
-            ubuntu: "22.04"
-          - arch: amd64
-            clang-tools: "14"
-            image: ubuntu-cpp-sanitizer
-            llvm: "14"
-            runs-on: ubuntu-latest
-            title: AMD64 Ubuntu 22.04 C++ ASAN UBSAN
-            ubuntu: "22.04"
-          - arch: arm64v8
-            clang-tools: "10"
-            image: ubuntu-cpp
-            llvm: "10"
-            runs-on: ["self-hosted", "arm", "linux"]
-            title: ARM64 Ubuntu 20.04 C++
-            ubuntu: "20.04"
+        include: ${{ fromJson(needs.docker-targets.outputs.targets) }}
     env:
       ARCH: ${{ matrix.arch }}
       ARROW_SIMD_LEVEL: ${{ matrix.simd-level }}
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index cd44e65e8811b..bbffab6704087 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -43,31 +43,62 @@ permissions:
 
 jobs:
 
+  docker-targets:
+    name: Docker targets
+    runs-on: ubuntu-latest
+    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    outputs:
+      targets: ${{ steps.detect-targets.outputs.targets }}
+    steps:
+      - name: Detect targets
+        id: detect-targets
+        run: |
+          echo "targets<<JSON" >> "$GITHUB_OUTPUT"
+          echo "[" >> "$GITHUB_OUTPUT"
+          cat <<JSON >> "$GITHUB_OUTPUT"
+          {
+            "arch-label": "AMD64",
+            "arch": "amd64",
+            "go": "1.19",
+            "runs-on": "ubuntu-latest"
+          },
+          {
+            "arch-label": "AMD64",
+            "arch": "amd64",
+            "go": "1.20",
+            "runs-on": "ubuntu-latest"
+          }
+          JSON
+          if [ "$GITHUB_REPOSITORY_OWNER" = "apache" ]; then
+            echo "," >> "$GITHUB_OUTPUT"
+            cat <<JSON >> "$GITHUB_OUTPUT"
+          {
+            "arch-label": "ARM64",
+            "arch": "arm64v8",
+            "go": "1.19",
+            "runs-on": ["self-hosted", "arm", "linux"]
+          },
+          {
+            "arch-label": "ARM64",
+            "arch": "arm64v8",
+            "go": "1.20",
+            "runs-on": ["self-hosted", "arm", "linux"]
+          }
+          JSON
+          fi
+          echo "]" >> "$GITHUB_OUTPUT"
+          echo "JSON" >> "$GITHUB_OUTPUT"
+
   docker:
     name: ${{ matrix.arch-label }} Debian 11 Go ${{ matrix.go }}
+    needs: docker-targets
     runs-on: ${{ matrix.runs-on }}
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
-        include:
-          - arch-label: AMD64
-            arch: amd64
-            go: 1.19
-            runs-on: ubuntu-latest
-          - arch-label: AMD64
-            arch: amd64
-            go: '1.20'
-            runs-on: ubuntu-latest
-          - arch-label: ARM64
-            arch: arm64v8
-            go: 1.19
-            runs-on: ["self-hosted", "arm", "linux"]
-          - arch-label: ARM64
-            arch: arm64v8
-            go: '1.20'
-            runs-on: ["self-hosted", "arm", "linux"]
+        include: ${{ fromJson(needs.docker-targets.outputs.targets) }}
     env:
       ARCH: ${{ matrix.arch }}
       GO: ${{ matrix.go }}

From f38ae607983264dc52a938d1930916c73292a92e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Feb 2024 11:03:44 +0900
Subject: [PATCH 48/74] MINOR: [Java] Bump com.fasterxml.jackson:jackson-bom
 from 2.16.0 to 2.16.1 in /java (#39947)

Bumps [com.fasterxml.jackson:jackson-bom](https://github.com/FasterXML/jackson-bom) from 2.16.0 to 2.16.1.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/FasterXML/jackson-bom/commit/f70e9cfb38eb8d8b260961eef34b8b9590b48f22"><code>f70e9cf</code></a> [maven-release-plugin] prepare release jackson-bom-2.16.1</li>
<li><a href="https://github.com/FasterXML/jackson-bom/commit/22a8c3a436e4d05a2c5bc79431db4b008df38c45"><code>22a8c3a</code></a> Prepare for 2.16.1 release</li>
<li><a href="https://github.com/FasterXML/jackson-bom/commit/4203816bd92147728d21553d0d0ccdd1af7455c6"><code>4203816</code></a> back to snapshot deps</li>
<li><a href="https://github.com/FasterXML/jackson-bom/commit/4fb9d50de2bdc76383ba80dd78da019a1555ae7a"><code>4fb9d50</code></a> [maven-release-plugin] prepare for next development iteration</li>
<li>See full diff in <a href="https://github.com/FasterXML/jackson-bom/compare/jackson-bom-2.16.0...jackson-bom-2.16.1">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.fasterxml.jackson:jackson-bom&package-manager=maven&previous-version=2.16.0&new-version=2.16.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index b2b300b2f3fed..1faeb46d02afc 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -36,7 +36,7 @@
     <dep.netty-bom.version>4.1.106.Final</dep.netty-bom.version>
     <dep.grpc-bom.version>1.60.0</dep.grpc-bom.version>
     <dep.protobuf-bom.version>3.23.1</dep.protobuf-bom.version>
-    <dep.jackson-bom.version>2.16.0</dep.jackson-bom.version>
+    <dep.jackson-bom.version>2.16.1</dep.jackson-bom.version>
     <dep.hadoop.version>3.3.6</dep.hadoop.version>
     <dep.fbs.version>23.5.26</dep.fbs.version>
     <dep.avro.version>1.11.3</dep.avro.version>

From 1950f8000fa25368602b530dbec4b3d286aed819 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Feb 2024 11:04:08 +0900
Subject: [PATCH 49/74] MINOR: [Java] Bump org.cyclonedx:cyclonedx-maven-plugin
 from 2.7.10 to 2.7.11 in /java (#39948)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.cyclonedx:cyclonedx-maven-plugin](https://github.com/CycloneDX/cyclonedx-maven-plugin) from 2.7.10 to 2.7.11.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/releases">org.cyclonedx:cyclonedx-maven-plugin's releases</a>.</em></p>
<blockquote>
<h2>2.7.11</h2>

<h2>🚀 New features and improvements</h2>
<ul>
<li>rename convert methohds to explicit project vs dependency (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/456">#456</a>) <a href="https://github.com/hboutemy"><code>@​hboutemy</code></a></li>
<li>cleanup unused code (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/455">#455</a>) <a href="https://github.com/hboutemy"><code>@​hboutemy</code></a></li>
<li>test dependency type=zip for <a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/issues/431">#431</a> (reverts <a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/issues/9">#9</a>) (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/454">#454</a>) <a href="https://github.com/hboutemy"><code>@​hboutemy</code></a></li>
<li>Support metadata when dependency is any other dependency type than jar (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/431">#431</a>) <a href="https://github.com/AlbGarciam"><code>@​AlbGarciam</code></a></li>
<li>Add support for custom external references (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/428">#428</a>) <a href="https://github.com/vy"><code>@​vy</code></a></li>
<li>Add a configuration option to skip undeployed artifacts (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/435">#435</a>) <a href="https://github.com/ppkarwasz"><code>@​ppkarwasz</code></a></li>
<li>use metadata properties in UUID (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/441">#441</a>) <a href="https://github.com/hboutemy"><code>@​hboutemy</code></a></li>
<li>Generate serial numbers deterministically (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/issues/420">#420</a>) (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/425">#425</a>) <a href="https://github.com/vy"><code>@​vy</code></a></li>
</ul>
<h2>📦 Dependency updates</h2>
<ul>
<li>define plugin-tools.version property (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/453">#453</a>) <a href="https://github.com/hboutemy"><code>@​hboutemy</code></a></li>
<li>Bump org.apache.maven.plugin-tools:maven-plugin-annotations from 3.10.2 to 3.11.0 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/451">#451</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugins:maven-plugin-report-plugin from 3.10.2 to 3.11.0 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/450">#450</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugins:maven-plugin-plugin from 3.10.2 to 3.11.0 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/449">#449</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugins:maven-compiler-plugin from 3.11.0 to 3.12.1 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/447">#447</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugins:maven-plugin-plugin from 3.10.1 to 3.10.2 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/445">#445</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugins:maven-project-info-reports-plugin from 3.4.5 to 3.5.0 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/442">#442</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.commons:commons-lang3 from 3.13.0 to 3.14.0 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/443">#443</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugin-tools:maven-plugin-annotations from 3.10.1 to 3.10.2 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/444">#444</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.junit:junit-bom from 5.10.0 to 5.10.1 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/422">#422</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugins:maven-plugin-report-plugin from 3.10.1 to 3.10.2 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/424">#424</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugins:maven-javadoc-plugin from 3.6.0 to 3.6.3 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/438">#438</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump actions/setup-java from 3 to 4 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/437">#437</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugins:maven-plugin-report-plugin from 3.9.0 to 3.10.1 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/417">#417</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/349fe7cc7fd8b7f2224075b5fe7d73e7f0832140"><code>349fe7c</code></a> [maven-release-plugin] prepare release cyclonedx-maven-plugin-2.7.11</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/2d130a0c194012f8101a7f3b0b80aab47a3f009c"><code>2d130a0</code></a> rename convert methohds to explicit project vs dependency</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/051be8e20605dec7ece3b1fba7833ed9f444bc54"><code>051be8e</code></a> cleanup unused code</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/d0e6cb5f91b29caaeb06fe459590d2c0c514c8fa"><code>d0e6cb5</code></a> test dependency type=zip for <a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/issues/431">#431</a> (reverts <a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/issues/9">#9</a>)</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/46837cdbde2f674fb29a800e61d0a30df0ca6abb"><code>46837cd</code></a> Update DefaultModelConverter.java to support Zip files</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/dc90b21db46e2d92d86d85f54002ac568252f388"><code>dc90b21</code></a> define plugin-tools.version property</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/8836cbd26de1aacc08dc5dac0654cf6750d5b79a"><code>8836cbd</code></a> Add support for custom external references (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/issues/428">#428</a>)</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/86410aa253c2f19a581e624314ab027a9b741440"><code>86410aa</code></a> Bump org.apache.maven.plugin-tools:maven-plugin-annotations</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/4d71b50d5ab0e40355819e5fb7d082422ffff64b"><code>4d71b50</code></a> Bump org.apache.maven.plugins:maven-plugin-report-plugin</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/70aae8e714beea657efe94733a81e6fd5ef0fe93"><code>70aae8e</code></a> Bump org.apache.maven.plugins:maven-plugin-plugin from 3.10.2 to 3.11.0</li>
<li>Additional commits viewable in <a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/compare/cyclonedx-maven-plugin-2.7.10...cyclonedx-maven-plugin-2.7.11">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.cyclonedx:cyclonedx-maven-plugin&package-manager=maven&previous-version=2.7.10&new-version=2.7.11)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/maven/pom.xml | 2 +-
 java/pom.xml       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/maven/pom.xml b/java/maven/pom.xml
index 7fdca7db7b8d8..9842777c15495 100644
--- a/java/maven/pom.xml
+++ b/java/maven/pom.xml
@@ -271,7 +271,7 @@
       <plugin>
         <groupId>org.cyclonedx</groupId>
         <artifactId>cyclonedx-maven-plugin</artifactId>
-        <version>2.7.10</version>
+        <version>2.7.11</version>
         <executions>
           <execution>
             <phase>package</phase>
diff --git a/java/pom.xml b/java/pom.xml
index 1faeb46d02afc..e928960182ab2 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -364,7 +364,7 @@
       <plugin>
         <groupId>org.cyclonedx</groupId>
         <artifactId>cyclonedx-maven-plugin</artifactId>
-        <version>2.7.10</version>
+        <version>2.7.11</version>
         <executions>
           <execution>
             <phase>package</phase>

From c7a166fc5aeec3f1b6e5d68cc7746b228a8dad04 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Feb 2024 11:04:37 +0900
Subject: [PATCH 50/74] MINOR: [Java] Bump
 org.apache.maven.plugins:maven-project-info-reports-plugin from 3.0.0 to
 3.5.0 in /java (#39949)

Bumps [org.apache.maven.plugins:maven-project-info-reports-plugin](https://github.com/apache/maven-project-info-reports-plugin) from 3.0.0 to 3.5.0.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/28ac4b55b4b3476302bcd9814516ad66aea3afca"><code>28ac4b5</code></a> [maven-release-plugin] prepare release maven-project-info-reports-plugin-3.5.0</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/9c4fc338fc62620794ee57aa4bd696e788b8c797"><code>9c4fc33</code></a> Remove unneeded incompatiblity notice</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/482ea62ca5300ee431c5d0fc08f26cbfcb30a391"><code>482ea62</code></a> Fix formatting</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/60cfdeaa49fc6e7f3ace58cb4e42fbb7622f06b5"><code>60cfdea</code></a> [MPIR-453] Replace Commons IO in favor of standard APIs</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/4d94edc4eb51c0349980eea90a6eb893d9f7f738"><code>4d94edc</code></a> [MPIR-446] Update to Maven SCM 2.0.</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/91a065b3e5b314daf0a6c338f26a8a3ba2f3931e"><code>91a065b</code></a> [MPIR-452] Upgrade to Parent 41</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/ceac0bfb517d8d5fa095031d6bd57e6d4271bb60"><code>ceac0bf</code></a> Consistently use MavenReport#getReportOutputDirectory()</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/c16ec9433ccc394a8d8e50d5367a0b0a7881c8f7"><code>c16ec94</code></a> [MNG-7416] Simplify Boolean expressions and returns (<a href="https://redirect.github.com/apache/maven-project-info-reports-plugin/issues/63">#63</a>)</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/abd0e76e85949f962621b7e32548ef2f69d77946"><code>abd0e76</code></a> Fix style value</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/18aedbb07e26bd87b61972767100ebb81956b4f2"><code>18aedbb</code></a> Reduce IT runtime by invoking goal directly</li>
<li>Additional commits viewable in <a href="https://github.com/apache/maven-project-info-reports-plugin/compare/maven-project-info-reports-plugin-3.0.0...maven-project-info-reports-plugin-3.5.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-project-info-reports-plugin&package-manager=maven&previous-version=3.0.0&new-version=3.5.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/maven/pom.xml | 2 +-
 java/pom.xml       | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/java/maven/pom.xml b/java/maven/pom.xml
index 9842777c15495..c2b13119fc440 100644
--- a/java/maven/pom.xml
+++ b/java/maven/pom.xml
@@ -333,7 +333,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-project-info-reports-plugin</artifactId>
-        <version>3.0.0</version>
+        <version>3.5.0</version>
       </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
diff --git a/java/pom.xml b/java/pom.xml
index e928960182ab2..258e45a519c59 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -395,7 +395,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-project-info-reports-plugin</artifactId>
-        <version>3.0.0</version>
+        <version>3.5.0</version>
       </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
@@ -598,7 +598,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-project-info-reports-plugin</artifactId>
-          <version>3.0.0</version>
+          <version>3.5.0</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
@@ -803,7 +803,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-project-info-reports-plugin</artifactId>
-        <version>3.0.0</version>
+        <version>3.5.0</version>
       </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>

From 672238ff6352fa388b54182d8ae1667f9e99c327 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Feb 2024 11:05:00 +0900
Subject: [PATCH 51/74] MINOR: [Java] Bump io.grpc:grpc-bom from 1.60.0 to
 1.61.1 in /java (#39950)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [io.grpc:grpc-bom](https://github.com/grpc/grpc-java) from 1.60.0 to 1.61.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/grpc/grpc-java/releases">io.grpc:grpc-bom's releases</a>.</em></p>
<blockquote>
<h2>v1.61.1</h2>
<h3>Bug Fixes</h3>
<p>xds: Fix a bug in <code>WeightedRoundRobinLoadBalancer</code> policy that could raise <code>NullPointerException</code> and further cause channel panic when picking a subchannel. This bug can only be triggered when connection can not be established and the channel reports <code>TRANSIENT_FAILURE</code> state. (<a href="https://redirect.github.com/grpc/grpc-java/issues/10868">#10868</a>)</p>
<h2>v1.61.0</h2>
<h3>API Changes</h3>
<ul>
<li>Remove unused experimental API ManagedChannelBuilder.enableFullStreamDecompression (<a href="https://redirect.github.com/grpc/grpc-java/issues/10744">#10744</a>)</li>
<li>api: Deprecate LoadBalancer.EMPTY_PICKER added in 1.58.0 in favor of FixedResultPicker (860b5cb1f)</li>
</ul>
<h3>New Features</h3>
<ul>
<li>binder: Experimental support for asynchronous security policies (<a href="https://redirect.github.com/grpc/grpc-java/issues/10566">#10566</a>)</li>
</ul>
<h3>Improvements</h3>
<ul>
<li>core: reduce CompositeReadableBuffer allocation (<a href="https://redirect.github.com/grpc/grpc-java/issues/3279">#3279</a>)</li>
<li>core: Improve error message clarity when a channel leak is detected (201893f5e)</li>
<li>util: use shared index across <code>round_robin</code> pickers (dca89b25b). This makes its implementation more similar to <code>weighted_round_robin</code>.</li>
<li>xds: Implement ADS stream flow control mechanism (<a href="https://redirect.github.com/grpc/grpc-java/issues/10674">#10674</a>). This limits the maximum memory consumed if the control plane sends updates more rapidly than they can be processed.</li>
</ul>
<h3>Bug Fixes</h3>
<ul>
<li>core: Check outbound maximum message size for the compressed size in addition to the already-checked uncompressed size (<a href="https://redirect.github.com/grpc/grpc-java/issues/10739">#10739</a>). Fixed the status code to be RESOURCE_EXHAUSTED instead of UNKNOWN.</li>
<li>util: Fix NPE when multiple addresses are in an address group for petiole load balancer policies (<a href="https://redirect.github.com/grpc/grpc-java/issues/10769">#10769</a>)</li>
<li>util: Disable publishing of fixtures (8ac43dd81). The Gradle test fixtures are for use by grpc-java's internal tests.</li>
<li>okhttp: Ignore known conscrypt socket close issue (<a href="https://redirect.github.com/grpc/grpc-java/issues/10812">#10812</a>). This stops an exception from being thrown when a known Conscrypt synchronization issue happens.</li>
</ul>
<h3>Dependencies</h3>
<ul>
<li>Drop support for Bazel 5 (55a9c012c). Bazel 7 is available, and Protobuf has already dropped support for Bazel 5.</li>
<li>Change many compile deps to runtime deps (d6830d7f9). This reduces the transitive classes &quot;leaked&quot; into the compile classpath. In particular, grpc-core (<code>io.grpc.internal</code>) will be less frequently included transitively at compile time.</li>
<li>Upgrade dependencies (c985797d9)
<ul>
<li>Protobuf to 3.25.1</li>
<li>auto-value-annotations to 1.10.4</li>
<li>error_prone_annotations to 2.23.0</li>
<li>proto-google-common-protos to 2.29.0</li>
<li>google-cloud-logging to 3.15.14</li>
<li>guava to 32.1.3-android</li>
<li>okio to 3.4.0</li>
</ul>
</li>
</ul>
<h3>Acknowledgements</h3>
<ul>
<li><a href="https://github.com/Gordiychuk"><code>@​Gordiychuk</code></a></li>
<li><a href="https://github.com/jroper"><code>@​jroper</code></a></li>
<li><a href="https://github.com/jyane"><code>@​jyane</code></a></li>
<li><a href="https://github.com/ulfjack"><code>@​ulfjack</code></a></li>
</ul>
<h2>v1.60.2</h2>
<h3>Bug Fixes</h3>
<p>xds: Fix a bug in <code>WeightedRoundRobinLoadBalancer</code> policy that could raise <code>NullPointerException</code> and further cause channel panic when picking a subchannel. This bug can only be triggered when connection can not be established and the channel reports <code>TRANSIENT_FAILURE</code> state. (<a href="https://redirect.github.com/grpc/grpc-java/issues/10868">#10868</a>)</p>
<h2>v1.60.1</h2>
<h1>Bug Fixes</h1>
<ul>
<li>util: Fix NPE when multiple addresses in an address group for petiole load balancer policies (<a href="https://redirect.github.com/grpc/grpc-java/issues/10770">#10770</a>)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/grpc/grpc-java/commit/dfff9a9475a394c455742a9b45a69cf6d9640cf5"><code>dfff9a9</code></a> Bump version to 1.61.1</li>
<li><a href="https://github.com/grpc/grpc-java/commit/df1bb36ea4c3b51d23c78a6e64b41de9d57d81e8"><code>df1bb36</code></a> Update README etc to reference 1.61.1</li>
<li><a href="https://github.com/grpc/grpc-java/commit/1abdaf36e8c363b5f0d445b8c08f49cbb69dd55f"><code>1abdaf3</code></a> xds: fix NPE in wrr in TF state (<a href="https://redirect.github.com/grpc/grpc-java/issues/10875">#10875</a>)</li>
<li><a href="https://github.com/grpc/grpc-java/commit/529d0ab330395bf66a5e3942de314bedb95d8bd8"><code>529d0ab</code></a> Bump version to 1.61.1-SNAPSHOT</li>
<li><a href="https://github.com/grpc/grpc-java/commit/f06abeb6fda2985c19e9792edb6f2b071b6fd8b5"><code>f06abeb</code></a> Bump version to 1.61.0</li>
<li><a href="https://github.com/grpc/grpc-java/commit/77005107aaa1847cdbac0919d5e73752649c3485"><code>7700510</code></a> Update README protoc references to 3.25.1</li>
<li><a href="https://github.com/grpc/grpc-java/commit/c639b8161bd16d01ddec2a42ee3bf68c3a49e296"><code>c639b81</code></a> Update README etc to reference 1.61.0</li>
<li><a href="https://github.com/grpc/grpc-java/commit/560608100f3f69b2ff5716c969d20c6802f79451"><code>5606081</code></a> fix flaky xds test due to verification race (<a href="https://redirect.github.com/grpc/grpc-java/issues/10798">#10798</a>) (<a href="https://redirect.github.com/grpc/grpc-java/issues/10808">#10808</a>)</li>
<li><a href="https://github.com/grpc/grpc-java/commit/2531563a3eb59e154e3098d8a45f39f003bcd439"><code>2531563</code></a> okhttp: Ignore known conscrypt socket close issue (<a href="https://redirect.github.com/grpc/grpc-java/issues/10811">#10811</a>) (<a href="https://redirect.github.com/grpc/grpc-java/issues/10812">#10812</a>)</li>
<li><a href="https://github.com/grpc/grpc-java/commit/5b082ca6404bd7fddf2f88d030a756694d1879f8"><code>5b082ca</code></a> Do not cache failed futures for async security policies indefinitely. (<a href="https://redirect.github.com/grpc/grpc-java/issues/10743">#10743</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/grpc/grpc-java/compare/v1.60.0...v1.61.1">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=io.grpc:grpc-bom&package-manager=maven&previous-version=1.60.0&new-version=1.61.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 258e45a519c59..6442987f5a192 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -34,7 +34,7 @@
     <dep.slf4j.version>2.0.11</dep.slf4j.version>
     <dep.guava-bom.version>33.0.0-jre</dep.guava-bom.version>
     <dep.netty-bom.version>4.1.106.Final</dep.netty-bom.version>
-    <dep.grpc-bom.version>1.60.0</dep.grpc-bom.version>
+    <dep.grpc-bom.version>1.61.1</dep.grpc-bom.version>
     <dep.protobuf-bom.version>3.23.1</dep.protobuf-bom.version>
     <dep.jackson-bom.version>2.16.1</dep.jackson-bom.version>
     <dep.hadoop.version>3.3.6</dep.hadoop.version>

From 0993b369c4b91d81a17166d1427e7c26cd9beee4 Mon Sep 17 00:00:00 2001
From: david dali susanibar arce <davi.sarces@gmail.com>
Date: Mon, 5 Feb 2024 21:35:34 -0500
Subject: [PATCH 52/74] GH-39900: [Java][CI] To upload Maven and Memory Netty
 Buffer Patch into Apache Nightly repository (#39901)

### Rationale for this change

To upload Maven and Memory Netty Buffer Patch into Apache Nightly repository

### What changes are included in this PR?

Upload Maven and Memory Netty Buffer Patch into Apache Nightly repository

### Are these changes tested?

Needed to run https://github.com/apache/arrow/actions/workflows/java_nightly.yml

### Are there any user-facing changes?

No
* Closes: #39900

Authored-by: david dali susanibar arce <davi.sarces@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/tasks/tasks.yml | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 0f8c58391fa66..cf04d29715306 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -748,6 +748,10 @@ tasks:
       - arrow-jdbc-{no_rc_snapshot_version}-tests.jar
       - arrow-jdbc-{no_rc_snapshot_version}.jar
       - arrow-jdbc-{no_rc_snapshot_version}.pom
+      - arrow-maven-plugins-{no_rc_snapshot_version}-cyclonedx.json
+      - arrow-maven-plugins-{no_rc_snapshot_version}-cyclonedx.xml
+      - arrow-maven-plugins-{no_rc_snapshot_version}-src.zip
+      - arrow-maven-plugins-{no_rc_snapshot_version}.pom
       - arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.json
       - arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.xml
       - arrow-memory-core-{no_rc_snapshot_version}-javadoc.jar
@@ -762,6 +766,13 @@ tasks:
       - arrow-memory-netty-{no_rc_snapshot_version}-tests.jar
       - arrow-memory-netty-{no_rc_snapshot_version}.jar
       - arrow-memory-netty-{no_rc_snapshot_version}.pom
+      - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-cyclonedx.json
+      - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-cyclonedx.xml
+      - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-javadoc.jar
+      - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-sources.jar
+      - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-tests.jar
+      - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}.jar
+      - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}.pom
       - arrow-memory-unsafe-{no_rc_snapshot_version}-cyclonedx.json
       - arrow-memory-unsafe-{no_rc_snapshot_version}-cyclonedx.xml
       - arrow-memory-unsafe-{no_rc_snapshot_version}-javadoc.jar
@@ -839,6 +850,13 @@ tasks:
       - flight-sql-jdbc-driver-{no_rc_snapshot_version}-tests.jar
       - flight-sql-jdbc-driver-{no_rc_snapshot_version}.jar
       - flight-sql-jdbc-driver-{no_rc_snapshot_version}.pom
+      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-cyclonedx.json
+      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-cyclonedx.xml
+      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-javadoc.jar
+      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-sources.jar
+      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-src.zip
+      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}.jar
+      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}.pom
 
   ############################## NuGet packages ###############################
 

From cd5faafb0c811d5985156c1fd1aecd1aa7130e9f Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 6 Feb 2024 13:53:54 +0900
Subject: [PATCH 53/74] GH-39955: [C++] Use make -j1 to install bundled bzip2
 (#39956)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

It seems that parallel "make install" isn't stable with "-G 'Unix Makefiles'" ("read jobs pipe: Bad file descriptor.  Stop." is the important part):

    [ 19%] Performing install step for 'bzip2_ep'
    CMake Error at /tmp/Rtmp5v99SJ/file70b591df48f/bzip2_ep-prefix/src/bzip2_ep-stamp/bzip2_ep-install-RELEASE.cmake:37 (message):
      Command failed: 2

       '/bin/make' 'install' 'PREFIX=/tmp/Rtmp5v99SJ/file70b591df48f/bzip2_ep-install' 'CC=/bin/gcc' 'CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/r-base-4.3.2=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC' 'AR=/bin/ar' 'RANLIB=/bin/ranlib'

      See also

        /tmp/Rtmp5v99SJ/file70b591df48f/bzip2_ep-prefix/src/bzip2_ep-stamp/bzip2_ep-install-*.log

    -- stdout output is:

    -- stderr output is:
    make[3]: *** read jobs pipe: Bad file descriptor.  Stop.
    make[3]: *** Waiting for unfinished jobs....
    bzip2.c: In function ‘applySavedFileAttrToOutputFile’:
    bzip2.c:1073:11: warning: ignoring return value of ‘fchown’ declared with attribute ‘warn_unused_result’ [-Wunused-result]
     1073 |    (void) fchown ( fd, fileMetaInfo.st_uid, fileMetaInfo.st_gid );
          |           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    CMake Error at /tmp/Rtmp5v99SJ/file70b591df48f/bzip2_ep-prefix/src/bzip2_ep-stamp/bzip2_ep-install-RELEASE.cmake:47 (message):
      Stopping after outputting logs.

    make[2]: *** [CMakeFiles/bzip2_ep.dir/build.make:104: bzip2_ep-prefix/src/bzip2_ep-stamp/bzip2_ep-install] Error 1
    make[1]: *** [CMakeFiles/Makefile2:1205: CMakeFiles/bzip2_ep.dir/all] Error 2
    make[1]: *** Waiting for unfinished jobs....

### What changes are included in this PR?

Force to disable parallel processing for `make install`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* Closes: #39955

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 0238c26c0fb51..b16ee07756013 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2629,7 +2629,7 @@ macro(build_bzip2)
                       BUILD_IN_SOURCE 1
                       BUILD_COMMAND ${MAKE} libbz2.a ${MAKE_BUILD_ARGS}
                                     ${BZIP2_EXTRA_ARGS}
-                      INSTALL_COMMAND ${MAKE} install PREFIX=${BZIP2_PREFIX}
+                      INSTALL_COMMAND ${MAKE} install -j1 PREFIX=${BZIP2_PREFIX}
                                       ${BZIP2_EXTRA_ARGS}
                       INSTALL_DIR ${BZIP2_PREFIX}
                       URL ${ARROW_BZIP2_SOURCE_URL}

From a6e577d031d20a1a7d3dd01536b9a77db5d1bff8 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 6 Feb 2024 16:19:03 +0100
Subject: [PATCH 54/74] GH-39857: [C++] Improve error message for "chunker out
 of sync" condition (#39892)

### Rationale for this change

When writing the CSV reader, we thought that the parser not finding the same line limits as the chunker should never happen, hence the terse "chunker out of sync" error message.

It turns out that, if the input contains multiline cell values and the `newlines_in_values` option was not enabled, the chunker can happily delimit a block on a newline that's inside a quoted string. The parser will then see truncated data and will stop parsing, yielding a parsed size that's smaller than the first block (see added comment in the code).

### What changes are included in this PR?

* Add some parser tests that showcase the condition encountered in GH-39857
* Improve error message to guide users towards the solution

### Are these changes tested?

There's no functional change, the error message itself isn't tested.

### Are there any user-facing changes?

No.

* Closes: #39857

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/csv/parser_test.cc | 22 +++++++++++++++++++++
 cpp/src/arrow/csv/reader.cc      | 34 +++++++++++++++++++++++++++-----
 python/pyarrow/tests/test_csv.py | 25 +++++++++++++++++++++++
 3 files changed, 76 insertions(+), 5 deletions(-)

diff --git a/cpp/src/arrow/csv/parser_test.cc b/cpp/src/arrow/csv/parser_test.cc
index 960a69c59db5d..dd3d025202018 100644
--- a/cpp/src/arrow/csv/parser_test.cc
+++ b/cpp/src/arrow/csv/parser_test.cc
@@ -175,6 +175,13 @@ void AssertParsePartial(BlockParser& parser, const std::string& str,
   ASSERT_EQ(parsed_size, expected_size);
 }
 
+void AssertParsePartial(BlockParser& parser, const std::vector<std::string_view>& data,
+                        uint32_t expected_size) {
+  uint32_t parsed_size = static_cast<uint32_t>(-1);
+  ASSERT_OK(parser.Parse(data, &parsed_size));
+  ASSERT_EQ(parsed_size, expected_size);
+}
+
 void AssertLastRowEq(const BlockParser& parser,
                      const std::vector<std::string>& expected) {
   std::vector<std::string> values;
@@ -376,6 +383,21 @@ TEST(BlockParser, TruncatedData) {
   }
 }
 
+TEST(BlockParser, TruncatedDataViews) {
+  // The BlockParser API mandates that, when passing a vector of views,
+  // only the last view may be a truncated CSV block.
+  // In the current implementation, receiving a truncated non-last view
+  // simply stops parsing after that view.
+  BlockParser parser(ParseOptions::Defaults(), /*num_cols=*/3);
+  AssertParsePartial(parser, Views({"a,b,", "c\n"}), 0);
+  AssertParsePartial(parser, Views({"a,b,c\nd,", "e,f\n"}), 6);
+
+  // More sophisticated: non-last block ends on some newline inside a quoted string
+  // (terse reproducer of gh-39857)
+  AssertParsePartial(parser, Views({"a,b,\"c\n", "\"\n"}), 0);
+  AssertParsePartial(parser, Views({"a,b,c\n\"d\n", "\",e,f\n"}), 6);
+}
+
 TEST(BlockParser, Final) {
   // Tests for ParseFinal()
   BlockParser parser(ParseOptions::Defaults());
diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index 332fad054fea3..1ac25e290a814 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -261,11 +261,10 @@ class SerialBlockReader : public BlockReader {
     auto consume_bytes = [this, bytes_before_buffer,
                           next_buffer](int64_t nbytes) -> Status {
       DCHECK_GE(nbytes, 0);
-      auto offset = nbytes - bytes_before_buffer;
-      if (offset < 0) {
-        // Should not happen
-        return Status::Invalid("CSV parser got out of sync with chunker");
-      }
+      int64_t offset = nbytes - bytes_before_buffer;
+      // All data before the buffer should have been consumed.
+      // This is checked in Parse() and BlockParsingOperator::operator().
+      DCHECK_GE(offset, 0);
       partial_ = SliceBuffer(buffer_, offset);
       buffer_ = next_buffer;
       return Status::OK();
@@ -400,6 +399,7 @@ class BlockParsingOperator {
         count_rows_(first_row >= 0),
         num_rows_seen_(first_row) {}
 
+  // TODO: this is almost entirely the same as ReaderMixin::Parse(). Refactor?
   Result<ParsedBlock> operator()(const CSVBlock& block) {
     constexpr int32_t max_num_rows = std::numeric_limits<int32_t>::max();
     auto parser = std::make_shared<BlockParser>(
@@ -427,9 +427,24 @@ class BlockParsingOperator {
     } else {
       RETURN_NOT_OK(parser->Parse(views, &parsed_size));
     }
+
+    // `partial + completion` should have been entirely consumed.
+    const int64_t bytes_before_buffer = block.partial->size() + block.completion->size();
+    if (static_cast<int64_t>(parsed_size) < bytes_before_buffer) {
+      // This can happen if `newlines_in_values` is not enabled and
+      // `partial + completion` ends with a newline inside a quoted string.
+      // In this case, the BlockParser stops at the truncated data in the first
+      // block (see gh-39857).
+      return Status::Invalid(
+          "CSV parser got out of sync with chunker. This can mean the data file "
+          "contains cell values spanning multiple lines; please consider enabling "
+          "the option 'newlines_in_values'.");
+    }
+
     if (count_rows_) {
       num_rows_seen_ += parser->total_num_rows();
     }
+
     RETURN_NOT_OK(block.consume_bytes(parsed_size));
     return ParsedBlock{std::move(parser), block.block_index,
                        static_cast<int64_t>(parsed_size) + block.bytes_skipped};
@@ -738,6 +753,15 @@ class ReaderMixin {
     } else {
       RETURN_NOT_OK(parser->Parse(views, &parsed_size));
     }
+    // See BlockParsingOperator for explanation.
+    const int64_t bytes_before_buffer = partial->size() + completion->size();
+    if (static_cast<int64_t>(parsed_size) < bytes_before_buffer) {
+      return Status::Invalid(
+          "CSV parser got out of sync with chunker. This can mean the data file "
+          "contains cell values spanning multiple lines; please consider enabling "
+          "the option 'newlines_in_values'.");
+    }
+
     if (count_rows_) {
       num_rows_seen_ += parser->total_num_rows();
     }
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 31f24187e3b37..bc1dd8a09a768 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -667,6 +667,31 @@ def row_num(x):
             'b': ["e", "j"],
         }
 
+    def test_chunker_out_of_sync(self):
+        # GH-39892: if there are newlines in values, the parser may become
+        # out of sync with the chunker. In this case, we try to produce an
+        # informative error message.
+        rows = b"""a,b,c\nd,e,"f\n"\ng,h,i\n"""
+        expected = {
+            'a': ["d", "g"],
+            'b': ["e", "h"],
+            'c': ["f\n", "i"],
+        }
+        for block_size in range(8, 15):
+            # Sanity check: parsing works with newlines_in_values=True
+            d = self.read_bytes(
+                rows, parse_options=ParseOptions(newlines_in_values=True),
+                read_options=ReadOptions(block_size=block_size)).to_pydict()
+            assert d == expected
+        # With these block sizes, a block would end on the physical newline
+        # inside the quoted cell value, leading to a mismatch between
+        # CSV chunker and parser.
+        for block_size in range(8, 11):
+            with pytest.raises(ValueError,
+                               match="cell values spanning multiple lines"):
+                self.read_bytes(
+                    rows, read_options=ReadOptions(block_size=block_size))
+
 
 class BaseCSVTableRead(BaseTestCSV):
 

From 0a05626f08836152526babf103aec95d0e4ec507 Mon Sep 17 00:00:00 2001
From: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
Date: Thu, 8 Feb 2024 00:01:46 +1100
Subject: [PATCH 55/74] MINOR: [Rust][Docs] Update Rust FlightSQL status doc
 (#39959)

<!--
Thanks for opening a pull request!
If this is your first pull request you can find detailed information on
how
to contribute here:
* [New Contributor's
Guide](https://arrow.apache.org/docs/dev/developers/guide/step_by_step/pr_lifecycle.html#reviews-and-merge-of-the-pull-request)
* [Contributing
Overview](https://arrow.apache.org/docs/dev/developers/overview.html)


If this is not a [minor
PR](https://github.com/apache/arrow/blob/main/CONTRIBUTING.md#Minor-Fixes).
Could you open an issue for this pull request on GitHub?
https://github.com/apache/arrow/issues/new/choose

Opening GitHub issues ahead of time contributes to the
[Openness](http://theapacheway.com/open/#:~:text=Openness%20allows%20new%20users%20the,must%20happen%20in%20the%20open.)
of the Apache Arrow project.

Then could you also rename the pull request title in the following
format?

    GH-${GITHUB_ISSUE_ID}: [${COMPONENT}] ${SUMMARY}

or

    MINOR: [${COMPONENT}] ${SUMMARY}

In the case of PARQUET issues on JIRA the title also supports:

    PARQUET-${JIRA_ISSUE_ID}: [${COMPONENT}] ${SUMMARY}

-->

### Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

Updating arrow-rs FlightSQL support status on site:
https://arrow.apache.org/docs/status.html#flight-sql

arrow-rs issue: https://github.com/apache/arrow-rs/issues/4337

### What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

### Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

### Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please uncomment the
line below and explain which changes are breaking.
-->
<!-- **This PR includes breaking changes to public APIs.** -->

<!--
Please uncomment the line below (and provide explanation) if the changes
fix either (a) a security vulnerability, (b) a bug that caused incorrect
or invalid data to be produced, or (c) a bug that causes a crash (even
when the API contract is upheld). We use this to highlight fixes to
issues that may affect users without their knowledge. For this reason,
fixing bugs that cause errors don't count, since those are usually
obvious.
-->
<!-- **This PR contains a "Critical Fix".** -->
---
 docs/source/status.rst | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/docs/source/status.rst b/docs/source/status.rst
index 11dd9c2c2965c..a0375585dbee2 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -257,9 +257,9 @@ support/not support individual features.
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | CancelQuery                                | ✓     | ✓     |       |            |       |       |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| ClosePreparedStatement                     | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| ClosePreparedStatement                     | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| CreatePreparedStatement                    | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| CreatePreparedStatement                    | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | CreatePreparedSubstraitPlan                | ✓     | ✓     |       |            |       |       |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -267,35 +267,35 @@ support/not support individual features.
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | EndTransaction                             | ✓     | ✓     |       |            |       |       |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetCatalogs                                | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetCatalogs                                | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetCrossReference                          | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetCrossReference                          | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetDbSchemas                               | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetDbSchemas                               | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetExportedKeys                            | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetExportedKeys                            | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetImportedKeys                            | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetImportedKeys                            | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetPrimaryKeys                             | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetPrimaryKeys                             | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetSqlInfo                                 | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetSqlInfo                                 | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetTables                                  | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetTables                                  | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetTableTypes                              | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetTableTypes                              | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetXdbcTypeInfo                            | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetXdbcTypeInfo                            | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| PreparedStatementQuery                     | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| PreparedStatementQuery                     | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| PreparedStatementUpdate                    | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| PreparedStatementUpdate                    | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | StatementSubstraitPlan                     | ✓     | ✓     |       |            |       |       |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| StatementQuery                             | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| StatementQuery                             | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| StatementUpdate                            | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| StatementUpdate                            | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 
 .. seealso::

From c9f6e04323a0b714487a0f707b46fc3c55b909e0 Mon Sep 17 00:00:00 2001
From: Adam Reeve <adreeve@gmail.com>
Date: Thu, 8 Feb 2024 02:32:16 +1300
Subject: [PATCH 56/74] GH-24834: [C#] Support writing compressed IPC data
 (#39871)

### Rationale for this change

This allows using compression when writing IPC streams and files with the Arrow .NET library

### What changes are included in this PR?

* Adds a compress method to the `ICompressionCodec` interface and implements this for Zstd and LZ4Frame in the `Apache.Arrow.Compression` package
* Adds new compression related options to `IpcOptions`
* Implements buffer compression in `ArrowStreamWriter`

### Are these changes tested?

Yes, new unit tests have been added

### Are there any user-facing changes?

Yes, this is a new user-facing feature and the `status.rst` and `csharp/README` files have been updated

* Closes: #24834

Authored-by: Adam Reeve <adreeve@gmail.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 csharp/README.md                              |   8 +-
 .../CompressionCodecFactory.cs                |   9 +-
 .../Lz4CompressionCodec.cs                    |  32 ++-
 .../ZstdCompressionCodec.cs                   |  22 ++-
 .../src/Apache.Arrow/Ipc/ArrowFileWriter.cs   |  10 +-
 .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 117 +++++++++--
 .../src/Apache.Arrow/Ipc/ICompressionCodec.cs |  15 ++
 .../Ipc/ICompressionCodecFactory.cs           |  21 ++
 csharp/src/Apache.Arrow/Ipc/IpcOptions.cs     |  17 ++
 .../Apache.Arrow.Compression.Tests.csproj     |   1 +
 .../ArrowFileWriterTests.cs                   | 147 ++++++++++++++
 .../ArrowStreamWriterTests.cs                 | 184 ++++++++++++++++++
 .../Apache.Arrow.IntegrationTest.csproj       |   1 +
 .../IntegrationCommand.cs                     |  10 +-
 dev/archery/archery/integration/runner.py     |   1 -
 docs/source/status.rst                        |   4 +-
 16 files changed, 564 insertions(+), 35 deletions(-)
 create mode 100644 csharp/test/Apache.Arrow.Compression.Tests/ArrowFileWriterTests.cs
 create mode 100644 csharp/test/Apache.Arrow.Compression.Tests/ArrowStreamWriterTests.cs

diff --git a/csharp/README.md b/csharp/README.md
index 6e6ed9c756873..b36eb899db2d5 100644
--- a/csharp/README.md
+++ b/csharp/README.md
@@ -115,10 +115,10 @@ for currently available features.
 
 ### Compression
 
-- Buffer compression is not supported when writing IPC files or streams
-- Buffer decompression is supported, but requires installing the `Apache.Arrow.Compression` package,
-  and passing an `Apache.Arrow.Compression.CompressionCodecFactory` instance to the
-  `ArrowFileReader` or `ArrowStreamReader` constructor.
+- Buffer compression and decompression is supported, but requires installing the `Apache.Arrow.Compression` package.
+  When reading compressed data, you must pass an `Apache.Arrow.Compression.CompressionCodecFactory` instance to the
+  `ArrowFileReader` or `ArrowStreamReader` constructor, and when writing compressed data a
+  `CompressionCodecFactory` must be set in the `IpcOptions`.
   Alternatively, a custom implementation of `ICompressionCodecFactory` can be used.
 
 ## Not Implemented
diff --git a/csharp/src/Apache.Arrow.Compression/CompressionCodecFactory.cs b/csharp/src/Apache.Arrow.Compression/CompressionCodecFactory.cs
index 3e0a537a89a8f..4bfcdf6544f9d 100644
--- a/csharp/src/Apache.Arrow.Compression/CompressionCodecFactory.cs
+++ b/csharp/src/Apache.Arrow.Compression/CompressionCodecFactory.cs
@@ -24,11 +24,16 @@ namespace Apache.Arrow.Compression
     public sealed class CompressionCodecFactory : ICompressionCodecFactory
     {
         public ICompressionCodec CreateCodec(CompressionCodecType compressionCodecType)
+        {
+            return CreateCodec(compressionCodecType, null);
+        }
+
+        public ICompressionCodec CreateCodec(CompressionCodecType compressionCodecType, int? compressionLevel)
         {
             return compressionCodecType switch
             {
-                CompressionCodecType.Lz4Frame => Lz4CompressionCodec.Instance,
-                CompressionCodecType.Zstd => new ZstdCompressionCodec(),
+                CompressionCodecType.Lz4Frame => new Lz4CompressionCodec(compressionLevel),
+                CompressionCodecType.Zstd => new ZstdCompressionCodec(compressionLevel),
                 _ => throw new NotImplementedException($"Compression type {compressionCodecType} is not supported")
             };
         }
diff --git a/csharp/src/Apache.Arrow.Compression/Lz4CompressionCodec.cs b/csharp/src/Apache.Arrow.Compression/Lz4CompressionCodec.cs
index ebbcfbc3e095f..df19c16a30213 100644
--- a/csharp/src/Apache.Arrow.Compression/Lz4CompressionCodec.cs
+++ b/csharp/src/Apache.Arrow.Compression/Lz4CompressionCodec.cs
@@ -14,17 +14,35 @@
 // limitations under the License.
 
 using System;
+using System.IO;
 using Apache.Arrow.Ipc;
+using K4os.Compression.LZ4;
 using K4os.Compression.LZ4.Streams;
 
 namespace Apache.Arrow.Compression
 {
     internal sealed class Lz4CompressionCodec : ICompressionCodec
     {
-        /// <summary>
-        /// Singleton instance, used as this class doesn't need to be disposed and has no state
-        /// </summary>
-        public static readonly Lz4CompressionCodec Instance = new Lz4CompressionCodec();
+        private readonly LZ4EncoderSettings _settings = null;
+
+        public Lz4CompressionCodec(int? compressionLevel = null)
+        {
+            if (compressionLevel.HasValue)
+            {
+                if (Enum.IsDefined(typeof(LZ4Level), compressionLevel))
+                {
+                    _settings = new LZ4EncoderSettings
+                    {
+                        CompressionLevel = (LZ4Level) compressionLevel,
+                    };
+                }
+                else
+                {
+                    throw new ArgumentException(
+                        $"Invalid LZ4 compression level ({compressionLevel})", nameof(compressionLevel));
+                }
+            }
+        }
 
         public int Decompress(ReadOnlyMemory<byte> source, Memory<byte> destination)
         {
@@ -32,6 +50,12 @@ public int Decompress(ReadOnlyMemory<byte> source, Memory<byte> destination)
             return decoder.ReadManyBytes(destination.Span);
         }
 
+        public void Compress(ReadOnlyMemory<byte> source, Stream destination)
+        {
+            using var encoder = LZ4Frame.Encode(destination, _settings, leaveOpen: true);
+            encoder.WriteManyBytes(source.Span);
+        }
+
         public void Dispose()
         {
         }
diff --git a/csharp/src/Apache.Arrow.Compression/ZstdCompressionCodec.cs b/csharp/src/Apache.Arrow.Compression/ZstdCompressionCodec.cs
index 92c2e65371612..cc340a7cd1b9f 100644
--- a/csharp/src/Apache.Arrow.Compression/ZstdCompressionCodec.cs
+++ b/csharp/src/Apache.Arrow.Compression/ZstdCompressionCodec.cs
@@ -14,6 +14,7 @@
 // limitations under the License.
 
 using System;
+using System.IO;
 using Apache.Arrow.Ipc;
 using ZstdSharp;
 
@@ -22,10 +23,21 @@ namespace Apache.Arrow.Compression
     internal sealed class ZstdCompressionCodec : ICompressionCodec
     {
         private readonly Decompressor _decompressor;
+        private readonly Compressor _compressor;
 
-        public ZstdCompressionCodec()
+        public ZstdCompressionCodec(int? compressionLevel = null)
         {
+            if (compressionLevel.HasValue &&
+                (compressionLevel.Value < Compressor.MinCompressionLevel ||
+                 compressionLevel.Value > Compressor.MaxCompressionLevel))
+            {
+                throw new ArgumentException(
+                    $"Zstd compression level must be between {Compressor.MinCompressionLevel} and {Compressor.MaxCompressionLevel}",
+                    nameof(compressionLevel));
+            }
+
             _decompressor = new Decompressor();
+            _compressor = new Compressor(compressionLevel ?? Compressor.DefaultCompressionLevel);
         }
 
         public int Decompress(ReadOnlyMemory<byte> source, Memory<byte> destination)
@@ -33,9 +45,17 @@ public int Decompress(ReadOnlyMemory<byte> source, Memory<byte> destination)
             return _decompressor.Unwrap(source.Span, destination.Span);
         }
 
+        public void Compress(ReadOnlyMemory<byte> source, Stream destination)
+        {
+            using var compressor = new CompressionStream(
+                destination, _compressor, preserveCompressor: true, leaveOpen: true);
+            compressor.Write(source.Span);
+        }
+
         public void Dispose()
         {
             _decompressor.Dispose();
+            _compressor.Dispose();
         }
     }
 }
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
index 547fa800ec71e..a643012bab1a2 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
@@ -20,6 +20,7 @@
 using System.IO;
 using System.Threading;
 using System.Threading.Tasks;
+using Apache.Arrow.Memory;
 
 namespace Apache.Arrow.Ipc
 {
@@ -37,12 +38,17 @@ public ArrowFileWriter(Stream stream, Schema schema)
         }
 
         public ArrowFileWriter(Stream stream, Schema schema, bool leaveOpen)
-            : this(stream, schema, leaveOpen, options: null)
+            : this(stream, schema, leaveOpen, options: null, allocator: null)
         {
         }
 
         public ArrowFileWriter(Stream stream, Schema schema, bool leaveOpen, IpcOptions options)
-            : base(stream, schema, leaveOpen, options)
+            : this(stream, schema, leaveOpen, options, allocator: null)
+        {
+        }
+
+        public ArrowFileWriter(Stream stream, Schema schema, bool leaveOpen, IpcOptions options, MemoryAllocator allocator)
+            : base(stream, schema, leaveOpen, options, allocator)
         {
             if (!stream.CanWrite)
             {
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 07d1dcfdb171d..b002f8c8b1578 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -22,6 +22,7 @@
 using System.Threading;
 using System.Threading.Tasks;
 using Apache.Arrow.Arrays;
+using Apache.Arrow.Memory;
 using Apache.Arrow.Types;
 using Google.FlatBuffers;
 
@@ -29,7 +30,7 @@ namespace Apache.Arrow.Ipc
 {
     public class ArrowStreamWriter : IDisposable
     {
-        internal class ArrowRecordBatchFlatBufferBuilder :
+        private class ArrowRecordBatchFlatBufferBuilder :
             IArrowArrayVisitor<Int8Array>,
             IArrowArrayVisitor<Int16Array>,
             IArrowArrayVisitor<Int32Array>,
@@ -81,14 +82,21 @@ public Buffer(ArrowBuffer buffer, int offset)
             }
 
             private readonly List<Buffer> _buffers;
+            private readonly ICompressionCodec _compressionCodec;
+            private readonly MemoryAllocator _allocator;
+            private readonly MemoryStream _compressionStream;
 
             public IReadOnlyList<Buffer> Buffers => _buffers;
 
             public List<long> VariadicCounts { get; private set; } 
             public int TotalLength { get; private set; }
 
-            public ArrowRecordBatchFlatBufferBuilder()
+            public ArrowRecordBatchFlatBufferBuilder(
+                ICompressionCodec compressionCodec, MemoryAllocator allocator, MemoryStream compressionStream)
             {
+                _compressionCodec = compressionCodec;
+                _compressionStream = compressionStream;
+                _allocator = allocator;
                 _buffers = new List<Buffer>();
                 TotalLength = 0;
             }
@@ -238,11 +246,50 @@ private void CreateBuffers<T>(PrimitiveArray<T> array)
             private Buffer CreateBuffer(ArrowBuffer buffer)
             {
                 int offset = TotalLength;
+                const int UncompressedLengthSize = 8;
 
-                int paddedLength = checked((int)BitUtility.RoundUpToMultipleOf8(buffer.Length));
+                ArrowBuffer bufferToWrite;
+                if (_compressionCodec == null)
+                {
+                    bufferToWrite = buffer;
+                }
+                else if (buffer.Length == 0)
+                {
+                    // Write zero length and skip compression
+                    var uncompressedLengthBytes = _allocator.Allocate(UncompressedLengthSize);
+                    BinaryPrimitives.WriteInt64LittleEndian(uncompressedLengthBytes.Memory.Span, 0);
+                    bufferToWrite = new ArrowBuffer(uncompressedLengthBytes);
+                }
+                else
+                {
+                    // See format/Message.fbs, and the BUFFER BodyCompressionMethod for documentation on how
+                    // compressed buffers are stored.
+                    _compressionStream.Seek(0, SeekOrigin.Begin);
+                    _compressionStream.SetLength(0);
+                    _compressionCodec.Compress(buffer.Memory, _compressionStream);
+                    if (_compressionStream.Length < buffer.Length)
+                    {
+                        var newBuffer = _allocator.Allocate((int) _compressionStream.Length + UncompressedLengthSize);
+                        BinaryPrimitives.WriteInt64LittleEndian(newBuffer.Memory.Span, buffer.Length);
+                        _compressionStream.Seek(0, SeekOrigin.Begin);
+                        _compressionStream.ReadFullBuffer(newBuffer.Memory.Slice(UncompressedLengthSize));
+                        bufferToWrite = new ArrowBuffer(newBuffer);
+                    }
+                    else
+                    {
+                        // If the compressed buffer is larger than the uncompressed buffer, use the uncompressed
+                        // buffer instead, and indicate this by setting the uncompressed length to -1
+                        var newBuffer = _allocator.Allocate(buffer.Length + UncompressedLengthSize);
+                        BinaryPrimitives.WriteInt64LittleEndian(newBuffer.Memory.Span, -1);
+                        buffer.Memory.CopyTo(newBuffer.Memory.Slice(UncompressedLengthSize));
+                        bufferToWrite = new ArrowBuffer(newBuffer);
+                    }
+                }
+
+                int paddedLength = checked((int)BitUtility.RoundUpToMultipleOf8(bufferToWrite.Length));
                 TotalLength += paddedLength;
 
-                return new Buffer(buffer, offset);
+                return new Buffer(bufferToWrite, offset);
             }
 
             public void Visit(IArrowArray array)
@@ -269,6 +316,9 @@ public void Visit(IArrowArray array)
 
         private readonly bool _leaveOpen;
         private readonly IpcOptions _options;
+        private readonly MemoryAllocator _allocator;
+        // Reuse a single memory stream for writing compressed data to, to reduce memory allocations
+        private readonly MemoryStream _compressionStream = new MemoryStream();
 
         private protected const Flatbuf.MetadataVersion CurrentMetadataVersion = Flatbuf.MetadataVersion.V5;
 
@@ -285,15 +335,21 @@ public ArrowStreamWriter(Stream baseStream, Schema schema)
         }
 
         public ArrowStreamWriter(Stream baseStream, Schema schema, bool leaveOpen)
-            : this(baseStream, schema, leaveOpen, options: null)
+            : this(baseStream, schema, leaveOpen, options: null, allocator: null)
         {
         }
 
         public ArrowStreamWriter(Stream baseStream, Schema schema, bool leaveOpen, IpcOptions options)
+            : this(baseStream, schema, leaveOpen, options, allocator: null)
+        {
+        }
+
+        public ArrowStreamWriter(Stream baseStream, Schema schema, bool leaveOpen, IpcOptions options, MemoryAllocator allocator)
         {
             BaseStream = baseStream ?? throw new ArgumentNullException(nameof(baseStream));
             Schema = schema ?? throw new ArgumentNullException(nameof(schema));
             _leaveOpen = leaveOpen;
+            _allocator = allocator ?? MemoryAllocator.Default.Value;
 
             Buffers = ArrayPool<byte>.Create();
             Builder = new FlatBufferBuilder(1024);
@@ -301,6 +357,13 @@ public ArrowStreamWriter(Stream baseStream, Schema schema, bool leaveOpen, IpcOp
 
             _fieldTypeBuilder = new ArrowTypeFlatbufferBuilder(Builder);
             _options = options ?? IpcOptions.Default;
+
+            if (_options.CompressionCodec.HasValue && _options.CompressionCodecFactory == null)
+            {
+                throw new ArgumentException(
+                    $"A {nameof(_options.CompressionCodecFactory)} must be provided when a {nameof(_options.CompressionCodec)} is specified",
+                    nameof(options));
+            }
         }
 
         private void CreateSelfAndChildrenFieldNodes(ArrayData data)
@@ -326,6 +389,23 @@ private static int CountAllNodes(IReadOnlyList<Field> fields)
             return count;
         }
 
+        private Offset<Flatbuf.BodyCompression> GetBodyCompression()
+        {
+            if (_options.CompressionCodec == null)
+            {
+                return default;
+            }
+
+            var compressionType = _options.CompressionCodec.Value switch
+            {
+                CompressionCodecType.Lz4Frame => Flatbuf.CompressionType.LZ4_FRAME,
+                CompressionCodecType.Zstd => Flatbuf.CompressionType.ZSTD,
+                _ => throw new ArgumentOutOfRangeException()
+            };
+            return Flatbuf.BodyCompression.CreateBodyCompression(
+                Builder, compressionType, Flatbuf.BodyCompressionMethod.BUFFER);
+        }
+
         private static void CountSelfAndChildrenNodes(IArrowType type, ref int count)
         {
             if (type is NestedType nestedType)
@@ -356,7 +436,7 @@ private protected void WriteRecordBatchInternal(RecordBatch recordBatch)
             }
 
             (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset, VectorOffset variadicCountsOffset) =
-                PreparingWritingRecordBatch(recordBatch);
+                PrepareWritingRecordBatch(recordBatch);
 
             VectorOffset buffersVectorOffset = Builder.EndVector();
 
@@ -367,7 +447,7 @@ private protected void WriteRecordBatchInternal(RecordBatch recordBatch)
             Offset<Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length,
                 fieldNodesVectorOffset,
                 buffersVectorOffset,
-                default,
+                GetBodyCompression(),
                 variadicCountsOffset);
 
             long metadataLength = WriteMessage(Flatbuf.MessageHeader.RecordBatch,
@@ -397,7 +477,7 @@ private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBat
             }
 
             (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset, VectorOffset variadicCountsOffset) =
-                PreparingWritingRecordBatch(recordBatch);
+                PrepareWritingRecordBatch(recordBatch);
 
             VectorOffset buffersVectorOffset = Builder.EndVector();
 
@@ -408,7 +488,7 @@ private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBat
             Offset<Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length,
                 fieldNodesVectorOffset,
                 buffersVectorOffset,
-                default,
+                GetBodyCompression(),
                 variadicCountsOffset);
 
             long metadataLength = await WriteMessageAsync(Flatbuf.MessageHeader.RecordBatch,
@@ -482,12 +562,12 @@ private async ValueTask<long> WriteBufferDataAsync(IReadOnlyList<ArrowRecordBatc
             return bodyLength + bodyPaddingLength;
         }
 
-        private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset, VectorOffset> PreparingWritingRecordBatch(RecordBatch recordBatch)
+        private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset, VectorOffset> PrepareWritingRecordBatch(RecordBatch recordBatch)
         {
-            return PreparingWritingRecordBatch(recordBatch.Schema.FieldsList, recordBatch.ArrayList);
+            return PrepareWritingRecordBatch(recordBatch.Schema.FieldsList, recordBatch.ArrayList);
         }
 
-        private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset, VectorOffset> PreparingWritingRecordBatch(IReadOnlyList<Field> fields, IReadOnlyList<IArrowArray> arrays)
+        private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset, VectorOffset> PrepareWritingRecordBatch(IReadOnlyList<Field> fields, IReadOnlyList<IArrowArray> arrays)
         {
             Builder.Clear();
 
@@ -507,7 +587,13 @@ private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset, VectorOffset> Pre
 
             // Serialize buffers
 
-            var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder();
+            // CompressionCodec can be disposed after all data is visited by the builder,
+            // and doesn't need to be alive for the full lifetime of the ArrowRecordBatchFlatBufferBuilder
+            using var compressionCodec = _options.CompressionCodec.HasValue
+                ? _options.CompressionCodecFactory.CreateCodec(_options.CompressionCodec.Value, _options.CompressionLevel)
+                : null;
+
+            var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder(compressionCodec, _allocator, _compressionStream);
             for (int i = 0; i < fieldCount; i++)
             {
                 IArrowArray fieldArray = arrays[i];
@@ -599,7 +685,7 @@ private protected async Task WriteDictionaryAsync(long id, IArrowType valueType,
             var arrays = new List<IArrowArray> { dictionary };
 
             (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset, VectorOffset variadicCountsOffset) =
-                PreparingWritingRecordBatch(fields, arrays);
+                PrepareWritingRecordBatch(fields, arrays);
 
             VectorOffset buffersVectorOffset = Builder.EndVector();
 
@@ -607,7 +693,7 @@ private protected async Task WriteDictionaryAsync(long id, IArrowType valueType,
             Offset<Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, dictionary.Length,
                 fieldNodesVectorOffset,
                 buffersVectorOffset,
-                default,
+                GetBodyCompression(),
                 variadicCountsOffset);
 
             // TODO: Support delta.
@@ -994,6 +1080,7 @@ public virtual void Dispose()
             {
                 BaseStream.Dispose();
             }
+            _compressionStream.Dispose();
         }
     }
 
diff --git a/csharp/src/Apache.Arrow/Ipc/ICompressionCodec.cs b/csharp/src/Apache.Arrow/Ipc/ICompressionCodec.cs
index b18ca3a5e4190..16c01d7130fb5 100644
--- a/csharp/src/Apache.Arrow/Ipc/ICompressionCodec.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ICompressionCodec.cs
@@ -14,6 +14,7 @@
 // limitations under the License.
 
 using System;
+using System.IO;
 
 namespace Apache.Arrow.Ipc
 {
@@ -29,5 +30,19 @@ public interface ICompressionCodec : IDisposable
         /// <param name="destination">Data buffer to write decompressed data to</param>
         /// <returns>The number of decompressed bytes written into the destination</returns>
         int Decompress(ReadOnlyMemory<byte> source, Memory<byte> destination);
+
+        /// <summary>
+        /// Write compressed data
+        /// </summary>
+        /// <param name="source">The data to compress</param>
+        /// <param name="destination">The stream to write compressed data to</param>
+        void Compress(ReadOnlyMemory<byte> source, Stream destination)
+#if NET6_0_OR_GREATER
+        {
+            throw new NotImplementedException("This codec does not support compression");
+        }
+#else
+        ;
+#endif
     }
 }
diff --git a/csharp/src/Apache.Arrow/Ipc/ICompressionCodecFactory.cs b/csharp/src/Apache.Arrow/Ipc/ICompressionCodecFactory.cs
index 5422a033bd6d2..f367b15574b6e 100644
--- a/csharp/src/Apache.Arrow/Ipc/ICompressionCodecFactory.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ICompressionCodecFactory.cs
@@ -20,6 +20,27 @@ namespace Apache.Arrow.Ipc
     /// </summary>
     public interface ICompressionCodecFactory
     {
+        /// <summary>
+        /// Create a new compression codec
+        /// </summary>
+        /// <param name="compressionCodecType">The type of codec to create</param>
+        /// <returns>The created codec</returns>
         ICompressionCodec CreateCodec(CompressionCodecType compressionCodecType);
+
+        /// <summary>
+        /// Create a new compression codec with a specified compression level
+        /// </summary>
+        /// <param name="compressionCodecType">The type of codec to create</param>
+        /// <param name="compressionLevel">The compression level to use when compressing data</param>
+        /// <returns>The created codec</returns>
+        ICompressionCodec CreateCodec(CompressionCodecType compressionCodecType, int? compressionLevel)
+#if NET6_0_OR_GREATER
+        {
+            // Default implementation ignores the compression level
+            return CreateCodec(compressionCodecType);
+        }
+#else
+        ;
+#endif
     }
 }
diff --git a/csharp/src/Apache.Arrow/Ipc/IpcOptions.cs b/csharp/src/Apache.Arrow/Ipc/IpcOptions.cs
index b6cc3a1cb4b51..8484c9a04ab2d 100644
--- a/csharp/src/Apache.Arrow/Ipc/IpcOptions.cs
+++ b/csharp/src/Apache.Arrow/Ipc/IpcOptions.cs
@@ -25,6 +25,23 @@ public class IpcOptions
         /// </summary>
         public bool WriteLegacyIpcFormat { get; set; }
 
+        /// <summary>
+        /// The compression codec to use to compress data buffers.
+        /// If null (the default value), no compression is used.
+        /// </summary>
+        public CompressionCodecType? CompressionCodec { get; set; }
+
+        /// <summary>
+        /// The compression codec factory used to create compression codecs.
+        /// Must be provided if a CompressionCodec is specified.
+        /// </summary>
+        public ICompressionCodecFactory CompressionCodecFactory { get; set; }
+
+        /// <summary>
+        /// Sets the compression level to use for codecs that support this.
+        /// </summary>
+        public int? CompressionLevel { get; set; }
+
         public IpcOptions()
         {
         }
diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index 8ed7a93bdcf27..f5e2a0ef8e16e 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -15,6 +15,7 @@
   <ItemGroup>
     <ProjectReference Include="..\..\src\Apache.Arrow\Apache.Arrow.csproj" />
     <ProjectReference Include="..\..\src\Apache.Arrow.Compression\Apache.Arrow.Compression.csproj" />
+    <ProjectReference Include="..\Apache.Arrow.Tests\Apache.Arrow.Tests.csproj" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Compression.Tests/ArrowFileWriterTests.cs b/csharp/test/Apache.Arrow.Compression.Tests/ArrowFileWriterTests.cs
new file mode 100644
index 0000000000000..a237f9c1d0660
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Compression.Tests/ArrowFileWriterTests.cs
@@ -0,0 +1,147 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Threading.Tasks;
+using Apache.Arrow.Ipc;
+using Apache.Arrow.Tests;
+using K4os.Compression.LZ4;
+using Xunit;
+
+namespace Apache.Arrow.Compression.Tests
+{
+    public class ArrowFileWriterTests
+    {
+        [Fact]
+        public void ThrowsWhenNoCompressionFactoryProvided()
+        {
+            var batch = TestData.CreateSampleRecordBatch(length: 100);
+            var options = new IpcOptions
+            {
+                CompressionCodec = CompressionCodecType.Zstd,
+            };
+
+            using var stream = new MemoryStream();
+            var exception = Assert.Throws<ArgumentException>(
+                () => new ArrowFileWriter(stream, batch.Schema, leaveOpen: true, options));
+
+            Assert.Contains("A CompressionCodecFactory must be provided", exception.Message);
+        }
+
+        [Theory]
+        [InlineData(CompressionCodecType.Zstd, null)]
+        [InlineData(CompressionCodecType.Zstd, 2)]
+        [InlineData(CompressionCodecType.Lz4Frame, null)]
+        [InlineData(CompressionCodecType.Lz4Frame, (int)LZ4Level.L03_HC)]
+        public void CanWriteCompressedIpcFile(CompressionCodecType codec, int? compressionLevel)
+        {
+            var batch = TestData.CreateSampleRecordBatch(length: 100);
+            var codecFactory = new CompressionCodecFactory();
+            var options = new IpcOptions
+            {
+                CompressionCodecFactory = codecFactory,
+                CompressionCodec = codec,
+                CompressionLevel = compressionLevel,
+            };
+            TestRoundTripRecordBatches(new [] {batch}, options, codecFactory);
+        }
+
+        [Theory]
+        [InlineData(CompressionCodecType.Zstd)]
+        [InlineData(CompressionCodecType.Lz4Frame)]
+        public async Task CanWriteCompressedIpcFileAsync(CompressionCodecType codec)
+        {
+            var batch = TestData.CreateSampleRecordBatch(length: 100);
+            var codecFactory = new CompressionCodecFactory();
+            var options = new IpcOptions
+            {
+                CompressionCodecFactory = codecFactory,
+                CompressionCodec = codec,
+            };
+            await TestRoundTripRecordBatchesAsync(new [] {batch}, options, codecFactory);
+        }
+
+        private static void TestRoundTripRecordBatches(
+            IReadOnlyList<RecordBatch> originalBatches, IpcOptions options, ICompressionCodecFactory codecFactory)
+        {
+            using var stream = new MemoryStream();
+
+            using (var writer = new ArrowFileWriter(stream, originalBatches[0].Schema, leaveOpen: true, options))
+            {
+                foreach (var originalBatch in originalBatches)
+                {
+                    writer.WriteRecordBatch(originalBatch);
+                }
+                writer.WriteEnd();
+            }
+
+            // Should throw if trying to read without an ICompressionCodecFactory
+            stream.Position = 0;
+            var exception = Assert.Throws<Exception>(() =>
+            {
+                using var reader = new ArrowFileReader(stream, leaveOpen: true);
+                reader.ReadNextRecordBatch();
+            });
+            Assert.Contains(nameof(ICompressionCodecFactory), exception.Message);
+
+            stream.Position = 0;
+            using (var reader = new ArrowFileReader(stream, codecFactory))
+            {
+                foreach (var originalBatch in originalBatches)
+                {
+                    var newBatch = reader.ReadNextRecordBatch();
+                    ArrowReaderVerifier.CompareBatches(originalBatch, newBatch);
+                }
+            }
+        }
+
+        private static async Task TestRoundTripRecordBatchesAsync(
+            IReadOnlyList<RecordBatch> originalBatches, IpcOptions options, ICompressionCodecFactory codecFactory)
+        {
+            using var stream = new MemoryStream();
+
+            using (var writer = new ArrowFileWriter(stream, originalBatches[0].Schema, leaveOpen: true, options))
+            {
+                foreach (var originalBatch in originalBatches)
+                {
+                    await writer.WriteRecordBatchAsync(originalBatch);
+                }
+                await writer.WriteEndAsync();
+            }
+
+            // Should throw if trying to read without an ICompressionCodecFactory
+            stream.Position = 0;
+            var exception = await Assert.ThrowsAsync<Exception>(async () =>
+            {
+                using var reader = new ArrowFileReader(stream, leaveOpen: true);
+                await reader.ReadNextRecordBatchAsync();
+            });
+            Assert.Contains(nameof(ICompressionCodecFactory), exception.Message);
+
+            stream.Position = 0;
+            using (var reader = new ArrowFileReader(stream, codecFactory))
+            {
+                foreach (var originalBatch in originalBatches)
+                {
+                    var newBatch = await reader.ReadNextRecordBatchAsync();
+                    ArrowReaderVerifier.CompareBatches(originalBatch, newBatch);
+                }
+            }
+        }
+    }
+}
+
diff --git a/csharp/test/Apache.Arrow.Compression.Tests/ArrowStreamWriterTests.cs b/csharp/test/Apache.Arrow.Compression.Tests/ArrowStreamWriterTests.cs
new file mode 100644
index 0000000000000..3b09dc26a343f
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Compression.Tests/ArrowStreamWriterTests.cs
@@ -0,0 +1,184 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Threading.Tasks;
+using Apache.Arrow.Ipc;
+using Apache.Arrow.Tests;
+using K4os.Compression.LZ4;
+using Xunit;
+
+namespace Apache.Arrow.Compression.Tests
+{
+    public class ArrowStreamWriterTests
+    {
+        [Fact]
+        public void ThrowsWhenNoCompressionFactoryProvided()
+        {
+            var batch = TestData.CreateSampleRecordBatch(length: 100);
+            var options = new IpcOptions
+            {
+                CompressionCodec = CompressionCodecType.Zstd,
+            };
+
+            using var stream = new MemoryStream();
+            var exception = Assert.Throws<ArgumentException>(
+                () => new ArrowStreamWriter(stream, batch.Schema, leaveOpen: true, options));
+
+            Assert.Contains("A CompressionCodecFactory must be provided", exception.Message);
+        }
+
+        [Theory]
+        [InlineData(CompressionCodecType.Zstd, null)]
+        [InlineData(CompressionCodecType.Zstd, 2)]
+        [InlineData(CompressionCodecType.Lz4Frame, null)]
+        [InlineData(CompressionCodecType.Lz4Frame, (int)LZ4Level.L03_HC)]
+        public void CanWriteCompressedIpcStream(CompressionCodecType codec, int? compressionLevel)
+        {
+            var batch = TestData.CreateSampleRecordBatch(length: 100);
+            var codecFactory = new CompressionCodecFactory();
+            var options = new IpcOptions
+            {
+                CompressionCodecFactory = codecFactory,
+                CompressionCodec = codec,
+                CompressionLevel = compressionLevel,
+            };
+            TestRoundTripRecordBatches(new [] {batch}, options, codecFactory);
+        }
+
+        [Theory]
+        [InlineData(CompressionCodecType.Zstd)]
+        [InlineData(CompressionCodecType.Lz4Frame)]
+        public async Task CanWriteCompressedIpcStreamAsync(CompressionCodecType codec)
+        {
+            var batch = TestData.CreateSampleRecordBatch(length: 100);
+            var codecFactory = new CompressionCodecFactory();
+            var options = new IpcOptions
+            {
+                CompressionCodecFactory = codecFactory,
+                CompressionCodec = codec,
+            };
+            await TestRoundTripRecordBatchesAsync(new [] {batch}, options, codecFactory);
+        }
+
+        [Fact]
+        public void CanWriteEmptyBatches()
+        {
+            var batch = TestData.CreateSampleRecordBatch(length: 0);
+            var codecFactory = new CompressionCodecFactory();
+            var options = new IpcOptions
+            {
+                CompressionCodecFactory = codecFactory,
+                CompressionCodec = CompressionCodecType.Lz4Frame,
+            };
+            TestRoundTripRecordBatches(new [] {batch}, options, codecFactory);
+        }
+
+        [Theory]
+        [InlineData(CompressionCodecType.Zstd)]
+        [InlineData(CompressionCodecType.Lz4Frame)]
+        public void ThrowsForInvalidCompressionLevel(CompressionCodecType codec)
+        {
+            var batch = TestData.CreateSampleRecordBatch(length: 100);
+            var codecFactory = new CompressionCodecFactory();
+            var options = new IpcOptions
+            {
+                CompressionCodecFactory = codecFactory,
+                CompressionCodec = codec,
+                CompressionLevel = 12345,
+            };
+
+            using var stream = new MemoryStream();
+
+            Assert.Throws<ArgumentException>(() =>
+            {
+                using var writer = new ArrowStreamWriter(stream, batch.Schema, leaveOpen: false, options);
+                writer.WriteRecordBatch(batch);
+                writer.WriteEnd();
+            });
+        }
+
+        private static void TestRoundTripRecordBatches(
+            IReadOnlyList<RecordBatch> originalBatches, IpcOptions options, ICompressionCodecFactory codecFactory)
+        {
+            using var stream = new MemoryStream();
+
+            using (var writer = new ArrowStreamWriter(stream, originalBatches[0].Schema, leaveOpen: true, options))
+            {
+                foreach (var originalBatch in originalBatches)
+                {
+                    writer.WriteRecordBatch(originalBatch);
+                }
+                writer.WriteEnd();
+            }
+
+            // Should throw if trying to read without an ICompressionCodecFactory
+            stream.Position = 0;
+            var exception = Assert.Throws<Exception>(() =>
+            {
+                using var reader = new ArrowStreamReader(stream, leaveOpen: true);
+                reader.ReadNextRecordBatch();
+            });
+            Assert.Contains(nameof(ICompressionCodecFactory), exception.Message);
+
+            stream.Position = 0;
+            using (var reader = new ArrowStreamReader(stream, codecFactory))
+            {
+                foreach (var originalBatch in originalBatches)
+                {
+                    var newBatch = reader.ReadNextRecordBatch();
+                    ArrowReaderVerifier.CompareBatches(originalBatch, newBatch);
+                }
+            }
+        }
+
+        private static async Task TestRoundTripRecordBatchesAsync(
+            IReadOnlyList<RecordBatch> originalBatches, IpcOptions options, ICompressionCodecFactory codecFactory)
+        {
+            using var stream = new MemoryStream();
+
+            using (var writer = new ArrowStreamWriter(stream, originalBatches[0].Schema, leaveOpen: true, options))
+            {
+                foreach (var originalBatch in originalBatches)
+                {
+                    await writer.WriteRecordBatchAsync(originalBatch);
+                }
+                await writer.WriteEndAsync();
+            }
+
+            // Should throw if trying to read without an ICompressionCodecFactory
+            stream.Position = 0;
+            var exception = await Assert.ThrowsAsync<Exception>(async () =>
+            {
+                using var reader = new ArrowStreamReader(stream, leaveOpen: true);
+                await reader.ReadNextRecordBatchAsync();
+            });
+            Assert.Contains(nameof(ICompressionCodecFactory), exception.Message);
+
+            stream.Position = 0;
+            using (var reader = new ArrowStreamReader(stream, codecFactory))
+            {
+                foreach (var originalBatch in originalBatches)
+                {
+                    var newBatch = await reader.ReadNextRecordBatchAsync();
+                    ArrowReaderVerifier.CompareBatches(originalBatch, newBatch);
+                }
+            }
+        }
+    }
+}
+
diff --git a/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj b/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj
index cb7f7ae896ee2..e77f329bf2a15 100644
--- a/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj
+++ b/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj
@@ -10,6 +10,7 @@
   <ItemGroup>
     <PackageReference Include="System.CommandLine" Version="2.0.0-beta1.21216.1" />
     <PackageReference Include="System.Text.Json" Version="5.0.2" />
+    <ProjectReference Include="..\..\src\Apache.Arrow.Compression\Apache.Arrow.Compression.csproj" />
     <ProjectReference Include="..\..\src\Apache.Arrow\Apache.Arrow.csproj" />
     <ProjectReference Include="..\Apache.Arrow.Tests\Apache.Arrow.Tests.csproj" />
   </ItemGroup>
diff --git a/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs b/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs
index 6a1e91240989b..3886846833c27 100644
--- a/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs
+++ b/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs
@@ -16,6 +16,7 @@
 using System;
 using System.IO;
 using System.Threading.Tasks;
+using Apache.Arrow.Compression;
 using Apache.Arrow.Ipc;
 using Apache.Arrow.Tests;
 using Apache.Arrow.Types;
@@ -65,8 +66,9 @@ private async Task<int> Validate()
         {
             JsonFile jsonFile = await ParseJsonFile();
 
+            var compressionFactory = new CompressionCodecFactory();
             using FileStream arrowFileStream = ArrowFileInfo.OpenRead();
-            using ArrowFileReader reader = new ArrowFileReader(arrowFileStream);
+            using ArrowFileReader reader = new ArrowFileReader(arrowFileStream, compressionCodecFactory: compressionFactory);
             int batchCount = await reader.RecordBatchCountAsync();
 
             if (batchCount != jsonFile.Batches.Count)
@@ -122,7 +124,8 @@ private async Task<int> JsonToArrow()
 
         private async Task<int> StreamToFile()
         {
-            using ArrowStreamReader reader = new ArrowStreamReader(Console.OpenStandardInput());
+            var compressionFactory = new CompressionCodecFactory();
+            using ArrowStreamReader reader = new ArrowStreamReader(Console.OpenStandardInput(), compressionCodecFactory: compressionFactory);
 
             RecordBatch batch = await reader.ReadNextRecordBatchAsync();
 
@@ -145,7 +148,8 @@ private async Task<int> StreamToFile()
         private async Task<int> FileToStream()
         {
             using FileStream fileStream = ArrowFileInfo.OpenRead();
-            using ArrowFileReader fileReader = new ArrowFileReader(fileStream);
+            var compressionFactory = new CompressionCodecFactory();
+            using ArrowFileReader fileReader = new ArrowFileReader(fileStream, compressionCodecFactory: compressionFactory);
 
             // read the record batch count to initialize the Schema
             await fileReader.RecordBatchCountAsync();
diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py
index 7fadb7e47cf93..299983f62f283 100644
--- a/dev/archery/archery/integration/runner.py
+++ b/dev/archery/archery/integration/runner.py
@@ -158,7 +158,6 @@ def _gold_tests(self, gold_dir):
                 skip_testers.add("JS")
                 skip_testers.add("Rust")
             if prefix == '2.0.0-compression':
-                skip_testers.add("C#")
                 skip_testers.add("JS")
 
             # See https://github.com/apache/arrow/pull/9822 for how to
diff --git a/docs/source/status.rst b/docs/source/status.rst
index a0375585dbee2..4bff37c8527fa 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -145,7 +145,7 @@ IPC Format
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Sparse tensors              | ✓     |       |       |            |       |       |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Buffer compression          | ✓     | ✓ (3) | ✓     |            | ✓ (4) |  ✓    | ✓     |       |
+| Buffer compression          | ✓     | ✓ (3) | ✓     |            | ✓     |  ✓    | ✓     |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Endianness conversion       | ✓ (2) |       | ✓ (2) |            |       |       |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -160,8 +160,6 @@ Notes:
 
 * \(3) LZ4 Codec currently is quite inefficient. ARROW-11901 tracks improving performance.
 
-* \(4) Compression when writing is not supported, only decompression when reading.
-
 .. seealso::
    The :ref:`format-ipc` specification.
 

From 478755f0ef79a921aaa14822c8829c122bbbe92e Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Wed, 7 Feb 2024 09:45:36 -0400
Subject: [PATCH 57/74] GH-38717: [C++] Add ImportChunkedArray and
 ExportChunkedArray to/from ArrowArrayStream (#39455)

### Rationale for this change

The `ChunkedArray` has no equivalent in the C data interface; however, it is the primary array structure that higher level bindings interact with (because it is a column in a `Table`). In the Python capsule interface, this means that ChunkedArrays always require a workaround involving loops in Python.

### What changes are included in this PR?

- Added `ImportChunkedArray()` and `ExportChunkedArray()`
- Generalized the classes that support import/export to relax the assumption that every `ArrowArray` in an `ArrowArrayStream` is a `RecordBatch`.

### Are these changes tested?

TODO

### Are there any user-facing changes?

Yes, two new functions are added to bridge.h.
* Closes: #38717

Lead-authored-by: Dewey Dunnington <dewey@voltrondata.com>
Co-authored-by: Dewey Dunnington <dewey@fishandwhistle.net>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Signed-off-by: Dewey Dunnington <dewey@voltrondata.com>
---
 cpp/src/arrow/c/bridge.cc      | 252 +++++++++++++++++++++++++--------
 cpp/src/arrow/c/bridge.h       |  22 +++
 cpp/src/arrow/c/bridge_test.cc | 115 +++++++++++++++
 3 files changed, 328 insertions(+), 61 deletions(-)

diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 119249da99a6d..022fce72f59b8 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -2002,13 +2002,49 @@ Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch(
 
 namespace {
 
+Status ExportStreamSchema(const std::shared_ptr<RecordBatchReader>& src,
+                          struct ArrowSchema* out_schema) {
+  return ExportSchema(*src->schema(), out_schema);
+}
+
+Status ExportStreamSchema(const std::shared_ptr<ChunkedArray>& src,
+                          struct ArrowSchema* out_schema) {
+  return ExportType(*src->type(), out_schema);
+}
+
+Status ExportStreamNext(const std::shared_ptr<RecordBatchReader>& src, int64_t i,
+                        struct ArrowArray* out_array) {
+  std::shared_ptr<RecordBatch> batch;
+  RETURN_NOT_OK(src->ReadNext(&batch));
+  if (batch == nullptr) {
+    // End of stream
+    ArrowArrayMarkReleased(out_array);
+    return Status::OK();
+  } else {
+    return ExportRecordBatch(*batch, out_array);
+  }
+}
+
+Status ExportStreamNext(const std::shared_ptr<ChunkedArray>& src, int64_t i,
+                        struct ArrowArray* out_array) {
+  if (i >= src->num_chunks()) {
+    // End of stream
+    ArrowArrayMarkReleased(out_array);
+    return Status::OK();
+  } else {
+    return ExportArray(*src->chunk(static_cast<int>(i)), out_array);
+  }
+}
+
+template <typename T>
 class ExportedArrayStream {
  public:
   struct PrivateData {
-    explicit PrivateData(std::shared_ptr<RecordBatchReader> reader)
-        : reader_(std::move(reader)) {}
+    explicit PrivateData(std::shared_ptr<T> reader)
+        : reader_(std::move(reader)), batch_num_(0) {}
 
-    std::shared_ptr<RecordBatchReader> reader_;
+    std::shared_ptr<T> reader_;
+    int64_t batch_num_;
     std::string last_error_;
 
     PrivateData() = default;
@@ -2018,19 +2054,11 @@ class ExportedArrayStream {
   explicit ExportedArrayStream(struct ArrowArrayStream* stream) : stream_(stream) {}
 
   Status GetSchema(struct ArrowSchema* out_schema) {
-    return ExportSchema(*reader()->schema(), out_schema);
+    return ExportStreamSchema(reader(), out_schema);
   }
 
   Status GetNext(struct ArrowArray* out_array) {
-    std::shared_ptr<RecordBatch> batch;
-    RETURN_NOT_OK(reader()->ReadNext(&batch));
-    if (batch == nullptr) {
-      // End of stream
-      ArrowArrayMarkReleased(out_array);
-      return Status::OK();
-    } else {
-      return ExportRecordBatch(*batch, out_array);
-    }
+    return ExportStreamNext(reader(), next_batch_num(), out_array);
   }
 
   const char* GetLastError() {
@@ -2070,6 +2098,15 @@ class ExportedArrayStream {
     return ExportedArrayStream{stream}.GetLastError();
   }
 
+  static Status Make(std::shared_ptr<T> reader, struct ArrowArrayStream* out) {
+    out->get_schema = ExportedArrayStream::StaticGetSchema;
+    out->get_next = ExportedArrayStream::StaticGetNext;
+    out->get_last_error = ExportedArrayStream::StaticGetLastError;
+    out->release = ExportedArrayStream::StaticRelease;
+    out->private_data = new ExportedArrayStream::PrivateData{std::move(reader)};
+    return Status::OK();
+  }
+
  private:
   int ToCError(const Status& status) {
     if (ARROW_PREDICT_TRUE(status.ok())) {
@@ -2093,7 +2130,9 @@ class ExportedArrayStream {
     return reinterpret_cast<PrivateData*>(stream_->private_data);
   }
 
-  const std::shared_ptr<RecordBatchReader>& reader() { return private_data()->reader_; }
+  const std::shared_ptr<T>& reader() { return private_data()->reader_; }
+
+  int64_t next_batch_num() { return private_data()->batch_num_++; }
 
   struct ArrowArrayStream* stream_;
 };
@@ -2102,12 +2141,12 @@ class ExportedArrayStream {
 
 Status ExportRecordBatchReader(std::shared_ptr<RecordBatchReader> reader,
                                struct ArrowArrayStream* out) {
-  out->get_schema = ExportedArrayStream::StaticGetSchema;
-  out->get_next = ExportedArrayStream::StaticGetNext;
-  out->get_last_error = ExportedArrayStream::StaticGetLastError;
-  out->release = ExportedArrayStream::StaticRelease;
-  out->private_data = new ExportedArrayStream::PrivateData{std::move(reader)};
-  return Status::OK();
+  return ExportedArrayStream<RecordBatchReader>::Make(std::move(reader), out);
+}
+
+Status ExportChunkedArray(std::shared_ptr<ChunkedArray> chunked_array,
+                          struct ArrowArrayStream* out) {
+  return ExportedArrayStream<ChunkedArray>::Make(std::move(chunked_array), out);
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -2115,66 +2154,58 @@ Status ExportRecordBatchReader(std::shared_ptr<RecordBatchReader> reader,
 
 namespace {
 
-class ArrayStreamBatchReader : public RecordBatchReader {
+class ArrayStreamReader {
  public:
-  explicit ArrayStreamBatchReader(std::shared_ptr<Schema> schema,
-                                  struct ArrowArrayStream* stream)
-      : schema_(std::move(schema)) {
+  explicit ArrayStreamReader(struct ArrowArrayStream* stream) {
     ArrowArrayStreamMove(stream, &stream_);
     DCHECK(!ArrowArrayStreamIsReleased(&stream_));
   }
 
-  ~ArrayStreamBatchReader() override {
+  ~ArrayStreamReader() { ReleaseStream(); }
+
+  void ReleaseStream() {
     if (!ArrowArrayStreamIsReleased(&stream_)) {
       ArrowArrayStreamRelease(&stream_);
     }
     DCHECK(ArrowArrayStreamIsReleased(&stream_));
   }
 
-  std::shared_ptr<Schema> schema() const override { return schema_; }
-
-  Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
-    struct ArrowArray c_array;
-    if (ArrowArrayStreamIsReleased(&stream_)) {
-      return Status::Invalid(
-          "Attempt to read from a reader that has already been closed");
-    }
-    RETURN_NOT_OK(StatusFromCError(stream_.get_next(&stream_, &c_array)));
-    if (ArrowArrayIsReleased(&c_array)) {
-      // End of stream
-      batch->reset();
-      return Status::OK();
-    } else {
-      return ImportRecordBatch(&c_array, schema_).Value(batch);
+ protected:
+  Status ReadNextArrayInternal(struct ArrowArray* array) {
+    ArrowArrayMarkReleased(array);
+    Status status = StatusFromCError(stream_.get_next(&stream_, array));
+    if (!status.ok() && !ArrowArrayIsReleased(array)) {
+      ArrowArrayRelease(array);
     }
+
+    return status;
   }
 
-  Status Close() override {
-    if (!ArrowArrayStreamIsReleased(&stream_)) {
-      ArrowArrayStreamRelease(&stream_);
-    }
-    return Status::OK();
+  Result<std::shared_ptr<Schema>> ReadSchema() {
+    struct ArrowSchema c_schema = {};
+    ARROW_RETURN_NOT_OK(
+        StatusFromCError(&stream_, stream_.get_schema(&stream_, &c_schema)));
+    ARROW_ASSIGN_OR_RAISE(auto schema, ImportSchema(&c_schema));
+    return schema;
   }
 
-  static Result<std::shared_ptr<RecordBatchReader>> Make(
-      struct ArrowArrayStream* stream) {
-    if (ArrowArrayStreamIsReleased(stream)) {
-      return Status::Invalid("Cannot import released ArrowArrayStream");
-    }
-    std::shared_ptr<Schema> schema;
+  Result<std::shared_ptr<Field>> ReadField() {
     struct ArrowSchema c_schema = {};
-    auto status = StatusFromCError(stream, stream->get_schema(stream, &c_schema));
-    if (status.ok()) {
-      status = ImportSchema(&c_schema).Value(&schema);
-    }
-    if (!status.ok()) {
-      ArrowArrayStreamRelease(stream);
-      return status;
+    ARROW_RETURN_NOT_OK(
+        StatusFromCError(&stream_, stream_.get_schema(&stream_, &c_schema)));
+    ARROW_ASSIGN_OR_RAISE(auto schema, ImportField(&c_schema));
+    return schema;
+  }
+
+  Status CheckNotReleased() {
+    if (ArrowArrayStreamIsReleased(&stream_)) {
+      return Status::Invalid(
+          "Attempt to read from a stream that has already been closed");
+    } else {
+      return Status::OK();
     }
-    return std::make_shared<ArrayStreamBatchReader>(std::move(schema), stream);
   }
 
- private:
   Status StatusFromCError(int errno_like) const {
     return StatusFromCError(&stream_, errno_like);
   }
@@ -2203,15 +2234,114 @@ class ArrayStreamBatchReader : public RecordBatchReader {
     return {code, last_error ? std::string(last_error) : ""};
   }
 
+ private:
   mutable struct ArrowArrayStream stream_;
+};
+
+class ArrayStreamBatchReader : public RecordBatchReader, public ArrayStreamReader {
+ public:
+  explicit ArrayStreamBatchReader(struct ArrowArrayStream* stream)
+      : ArrayStreamReader(stream) {}
+
+  Status Init() {
+    ARROW_ASSIGN_OR_RAISE(schema_, ReadSchema());
+    return Status::OK();
+  }
+
+  std::shared_ptr<Schema> schema() const override { return schema_; }
+
+  Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
+    ARROW_RETURN_NOT_OK(CheckNotReleased());
+
+    struct ArrowArray c_array;
+    ARROW_RETURN_NOT_OK(ReadNextArrayInternal(&c_array));
+
+    if (ArrowArrayIsReleased(&c_array)) {
+      // End of stream
+      batch->reset();
+      return Status::OK();
+    } else {
+      return ImportRecordBatch(&c_array, schema_).Value(batch);
+    }
+  }
+
+  Status Close() override {
+    ReleaseStream();
+    return Status::OK();
+  }
+
+ private:
   std::shared_ptr<Schema> schema_;
 };
 
+class ArrayStreamArrayReader : public ArrayStreamReader {
+ public:
+  explicit ArrayStreamArrayReader(struct ArrowArrayStream* stream)
+      : ArrayStreamReader(stream) {}
+
+  Status Init() {
+    ARROW_ASSIGN_OR_RAISE(field_, ReadField());
+    return Status::OK();
+  }
+
+  std::shared_ptr<DataType> data_type() const { return field_->type(); }
+
+  Status ReadNext(std::shared_ptr<Array>* array) {
+    ARROW_RETURN_NOT_OK(CheckNotReleased());
+
+    struct ArrowArray c_array;
+    ARROW_RETURN_NOT_OK(ReadNextArrayInternal(&c_array));
+
+    if (ArrowArrayIsReleased(&c_array)) {
+      // End of stream
+      array->reset();
+      return Status::OK();
+    } else {
+      return ImportArray(&c_array, field_->type()).Value(array);
+    }
+  }
+
+ private:
+  std::shared_ptr<Field> field_;
+};
+
 }  // namespace
 
 Result<std::shared_ptr<RecordBatchReader>> ImportRecordBatchReader(
     struct ArrowArrayStream* stream) {
-  return ArrayStreamBatchReader::Make(stream);
+  if (ArrowArrayStreamIsReleased(stream)) {
+    return Status::Invalid("Cannot import released ArrowArrayStream");
+  }
+
+  auto reader = std::make_shared<ArrayStreamBatchReader>(stream);
+  ARROW_RETURN_NOT_OK(reader->Init());
+  return reader;
+}
+
+Result<std::shared_ptr<ChunkedArray>> ImportChunkedArray(
+    struct ArrowArrayStream* stream) {
+  if (ArrowArrayStreamIsReleased(stream)) {
+    return Status::Invalid("Cannot import released ArrowArrayStream");
+  }
+
+  auto reader = std::make_shared<ArrayStreamArrayReader>(stream);
+  ARROW_RETURN_NOT_OK(reader->Init());
+
+  std::shared_ptr<DataType> data_type = reader->data_type();
+
+  ArrayVector chunks;
+  std::shared_ptr<Array> chunk;
+  while (true) {
+    ARROW_RETURN_NOT_OK(reader->ReadNext(&chunk));
+    if (!chunk) {
+      break;
+    }
+
+    chunks.push_back(std::move(chunk));
+  }
+
+  reader->ReleaseStream();
+  return ChunkedArray::Make(std::move(chunks), std::move(data_type));
 }
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/c/bridge.h b/cpp/src/arrow/c/bridge.h
index 45583109a761f..e98a42818f628 100644
--- a/cpp/src/arrow/c/bridge.h
+++ b/cpp/src/arrow/c/bridge.h
@@ -302,6 +302,17 @@ ARROW_EXPORT
 Status ExportRecordBatchReader(std::shared_ptr<RecordBatchReader> reader,
                                struct ArrowArrayStream* out);
 
+/// \brief Export C++ ChunkedArray using the C data interface format.
+///
+/// The resulting ArrowArrayStream struct keeps the chunked array data and buffers alive
+/// until its release callback is called by the consumer.
+///
+/// \param[in] chunked_array ChunkedArray object to export
+/// \param[out] out C struct where to export the stream
+ARROW_EXPORT
+Status ExportChunkedArray(std::shared_ptr<ChunkedArray> chunked_array,
+                          struct ArrowArrayStream* out);
+
 /// \brief Import C++ RecordBatchReader from the C stream interface.
 ///
 /// The ArrowArrayStream struct has its contents moved to a private object
@@ -313,6 +324,17 @@ ARROW_EXPORT
 Result<std::shared_ptr<RecordBatchReader>> ImportRecordBatchReader(
     struct ArrowArrayStream* stream);
 
+/// \brief Import C++ ChunkedArray from the C stream interface
+///
+/// The ArrowArrayStream struct has its contents moved to a private object,
+/// is consumed in its entirity, and released before returning all chunks
+/// as a ChunkedArray.
+///
+/// \param[in,out] stream C stream interface struct
+/// \return Imported ChunkedArray object
+ARROW_EXPORT
+Result<std::shared_ptr<ChunkedArray>> ImportChunkedArray(struct ArrowArrayStream* stream);
+
 /// @}
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index b8d5e0fcd3845..dba6e4736b673 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -4400,6 +4400,17 @@ class TestArrayStreamExport : public BaseArrayStreamTest {
     ASSERT_OK_AND_ASSIGN(auto batch, ImportRecordBatch(&c_array, expected.schema()));
     AssertBatchesEqual(expected, *batch);
   }
+
+  void AssertStreamNext(struct ArrowArrayStream* c_stream, const Array& expected) {
+    struct ArrowArray c_array;
+    ASSERT_EQ(0, c_stream->get_next(c_stream, &c_array));
+
+    ArrayExportGuard guard(&c_array);
+    ASSERT_FALSE(ArrowArrayIsReleased(&c_array));
+
+    ASSERT_OK_AND_ASSIGN(auto array, ImportArray(&c_array, expected.type()));
+    AssertArraysEqual(expected, *array);
+  }
 };
 
 TEST_F(TestArrayStreamExport, Empty) {
@@ -4495,6 +4506,67 @@ TEST_F(TestArrayStreamExport, Errors) {
   ASSERT_EQ(EINVAL, c_stream.get_next(&c_stream, &c_array));
 }
 
+TEST_F(TestArrayStreamExport, ChunkedArrayExportEmpty) {
+  ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArray::Make({}, int32()));
+
+  struct ArrowArrayStream c_stream;
+  struct ArrowSchema c_schema;
+
+  ASSERT_OK(ExportChunkedArray(chunked_array, &c_stream));
+  ArrayStreamExportGuard guard(&c_stream);
+
+  {
+    ArrayStreamExportGuard guard(&c_stream);
+    ASSERT_FALSE(ArrowArrayStreamIsReleased(&c_stream));
+
+    ASSERT_EQ(0, c_stream.get_schema(&c_stream, &c_schema));
+    AssertStreamEnd(&c_stream);
+  }
+
+  {
+    SchemaExportGuard schema_guard(&c_schema);
+    ASSERT_OK_AND_ASSIGN(auto got_type, ImportType(&c_schema));
+    AssertTypeEqual(*chunked_array->type(), *got_type);
+  }
+}
+
+TEST_F(TestArrayStreamExport, ChunkedArrayExport) {
+  ASSERT_OK_AND_ASSIGN(auto chunked_array,
+                       ChunkedArray::Make({ArrayFromJSON(int32(), "[1, 2]"),
+                                           ArrayFromJSON(int32(), "[4, 5, null]")}));
+
+  struct ArrowArrayStream c_stream;
+  struct ArrowSchema c_schema;
+  struct ArrowArray c_array0, c_array1;
+
+  ASSERT_OK(ExportChunkedArray(chunked_array, &c_stream));
+  ArrayStreamExportGuard guard(&c_stream);
+
+  {
+    ArrayStreamExportGuard guard(&c_stream);
+    ASSERT_FALSE(ArrowArrayStreamIsReleased(&c_stream));
+
+    ASSERT_EQ(0, c_stream.get_schema(&c_stream, &c_schema));
+    ASSERT_EQ(0, c_stream.get_next(&c_stream, &c_array0));
+    ASSERT_EQ(0, c_stream.get_next(&c_stream, &c_array1));
+    AssertStreamEnd(&c_stream);
+  }
+
+  ArrayExportGuard guard0(&c_array0), guard1(&c_array1);
+
+  {
+    SchemaExportGuard schema_guard(&c_schema);
+    ASSERT_OK_AND_ASSIGN(auto got_type, ImportType(&c_schema));
+    AssertTypeEqual(*chunked_array->type(), *got_type);
+  }
+
+  ASSERT_GT(pool_->bytes_allocated(), orig_allocated_);
+  ASSERT_OK_AND_ASSIGN(auto array, ImportArray(&c_array0, chunked_array->type()));
+  AssertArraysEqual(*chunked_array->chunk(0), *array);
+  ASSERT_OK_AND_ASSIGN(array, ImportArray(&c_array1, chunked_array->type()));
+  AssertArraysEqual(*chunked_array->chunk(1), *array);
+}
+
 ////////////////////////////////////////////////////////////////////////////
 // Array stream roundtrip tests
 
@@ -4534,6 +4606,29 @@ class TestArrayStreamRoundtrip : public BaseArrayStreamTest {
     ASSERT_TRUE(weak_reader.expired());
   }
 
+  void Roundtrip(std::shared_ptr<ChunkedArray> src,
+                 std::function<void(const std::shared_ptr<ChunkedArray>&)> check_func) {
+    ArrowArrayStream c_stream;
+
+    // One original copy which to compare the result, one copy held by the stream
+    std::weak_ptr<ChunkedArray> weak_src(src);
+    int64_t initial_use_count = weak_src.use_count();
+
+    ASSERT_OK(ExportChunkedArray(std::move(src), &c_stream));
+    ASSERT_FALSE(ArrowArrayStreamIsReleased(&c_stream));
+
+    {
+      ASSERT_OK_AND_ASSIGN(auto dst, ImportChunkedArray(&c_stream));
+      // Stream was moved, consumed, and released
+      ASSERT_TRUE(ArrowArrayStreamIsReleased(&c_stream));
+
+      // Stream was released by ImportChunkedArray but original copy remains
+      ASSERT_EQ(weak_src.use_count(), initial_use_count - 1);
+
+      check_func(dst);
+    }
+  }
+
   void AssertReaderNext(const std::shared_ptr<RecordBatchReader>& reader,
                         const RecordBatch& expected) {
     ASSERT_OK_AND_ASSIGN(auto batch, reader->Next());
@@ -4631,4 +4726,24 @@ TEST_F(TestArrayStreamRoundtrip, SchemaError) {
   ASSERT_TRUE(state.released);
 }
 
+TEST_F(TestArrayStreamRoundtrip, ChunkedArrayRoundtrip) {
+  ASSERT_OK_AND_ASSIGN(auto src,
+                       ChunkedArray::Make({ArrayFromJSON(int32(), "[1, 2]"),
+                                           ArrayFromJSON(int32(), "[4, 5, null]")}));
+
+  Roundtrip(src, [&](const std::shared_ptr<ChunkedArray>& dst) {
+    AssertTypeEqual(*dst->type(), *src->type());
+    AssertChunkedEqual(*dst, *src);
+  });
+}
+
+TEST_F(TestArrayStreamRoundtrip, ChunkedArrayRoundtripEmpty) {
+  ASSERT_OK_AND_ASSIGN(auto src, ChunkedArray::Make({}, int32()));
+
+  Roundtrip(src, [&](const std::shared_ptr<ChunkedArray>& dst) {
+    AssertTypeEqual(*dst->type(), *src->type());
+    AssertChunkedEqual(*dst, *src);
+  });
+}
+
 }  // namespace arrow

From 7e2fe4fe7634c359017213b79255c9040786fc06 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 7 Feb 2024 15:21:37 +0100
Subject: [PATCH 58/74] GH-39852: [Python] Support creating Binary/StringView
 arrays from python objects (#39853)

Next step for Binary/StringView support in Python (https://github.com/apache/arrow/issues/39633), now adding it to the python->arrow conversion code path.
* Closes: #39852

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 .../src/arrow/python/python_to_arrow.cc       | 35 ++++++++++++-------
 python/pyarrow/tests/test_convert_builtin.py  | 19 ++++++++--
 python/pyarrow/tests/test_scalars.py          | 28 +++------------
 3 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index d1d94ac17a13e..3c4d59d6594a2 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -486,6 +486,10 @@ class PyValue {
     return view.ParseString(obj);
   }
 
+  static Status Convert(const BinaryViewType*, const O&, I obj, PyBytesView& view) {
+    return view.ParseString(obj);
+  }
+
   static Status Convert(const FixedSizeBinaryType* type, const O&, I obj,
                         PyBytesView& view) {
     ARROW_RETURN_NOT_OK(view.ParseString(obj));
@@ -499,8 +503,8 @@ class PyValue {
   }
 
   template <typename T>
-  static enable_if_string<T, Status> Convert(const T*, const O& options, I obj,
-                                             PyBytesView& view) {
+  static enable_if_t<is_string_type<T>::value || is_string_view_type<T>::value, Status>
+  Convert(const T*, const O& options, I obj, PyBytesView& view) {
     if (options.strict) {
       // Strict conversion, force output to be unicode / utf8 and validate that
       // any binary values are utf8
@@ -570,18 +574,12 @@ struct PyConverterTrait;
 
 template <typename T>
 struct PyConverterTrait<
-    T,
-    enable_if_t<(!is_nested_type<T>::value && !is_interval_type<T>::value &&
-                 !is_extension_type<T>::value && !is_binary_view_like_type<T>::value) ||
-                std::is_same<T, MonthDayNanoIntervalType>::value>> {
+    T, enable_if_t<(!is_nested_type<T>::value && !is_interval_type<T>::value &&
+                    !is_extension_type<T>::value) ||
+                   std::is_same<T, MonthDayNanoIntervalType>::value>> {
   using type = PyPrimitiveConverter<T>;
 };
 
-template <typename T>
-struct PyConverterTrait<T, enable_if_binary_view_like<T>> {
-  // not implemented
-};
-
 template <typename T>
 struct PyConverterTrait<T, enable_if_list_like<T>> {
   using type = PyListConverter<T>;
@@ -699,11 +697,22 @@ class PyPrimitiveConverter<T, enable_if_t<std::is_same<T, FixedSizeBinaryType>::
   PyBytesView view_;
 };
 
+template <typename T, typename Enable = void>
+struct OffsetTypeTrait {
+  using type = typename T::offset_type;
+};
+
+template <typename T>
+struct OffsetTypeTrait<T, enable_if_binary_view_like<T>> {
+  using type = int64_t;
+};
+
 template <typename T>
-class PyPrimitiveConverter<T, enable_if_base_binary<T>>
+class PyPrimitiveConverter<
+    T, enable_if_t<is_base_binary_type<T>::value || is_binary_view_like_type<T>::value>>
     : public PrimitiveConverter<T, PyConverter> {
  public:
-  using OffsetType = typename T::offset_type;
+  using OffsetType = typename OffsetTypeTrait<T>::type;
 
   Status Append(PyObject* value) override {
     if (PyValue::IsNull(this->options_, value)) {
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 49c4f1a6e79d6..55ea28f50fbb3 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -763,6 +763,16 @@ def test_sequence_unicode():
     assert arr.to_pylist() == data
 
 
+@pytest.mark.parametrize("ty", [pa.string(), pa.large_string(), pa.string_view()])
+def test_sequence_unicode_explicit_type(ty):
+    data = ['foo', 'bar', None, 'mañana']
+    arr = pa.array(data, type=ty)
+    assert len(arr) == 4
+    assert arr.null_count == 1
+    assert arr.type == ty
+    assert arr.to_pylist() == data
+
+
 def check_array_mixed_unicode_bytes(binary_type, string_type):
     values = ['qux', b'foo', bytearray(b'barz')]
     b_values = [b'qux', b'foo', b'barz']
@@ -787,6 +797,7 @@ def check_array_mixed_unicode_bytes(binary_type, string_type):
 def test_array_mixed_unicode_bytes():
     check_array_mixed_unicode_bytes(pa.binary(), pa.string())
     check_array_mixed_unicode_bytes(pa.large_binary(), pa.large_string())
+    check_array_mixed_unicode_bytes(pa.binary_view(), pa.string_view())
 
 
 @pytest.mark.large_memory
@@ -818,7 +829,7 @@ def test_large_binary_value(ty):
 
 
 @pytest.mark.large_memory
-@pytest.mark.parametrize("ty", [pa.binary(), pa.string()])
+@pytest.mark.parametrize("ty", [pa.binary(), pa.string(), pa.string_view()])
 def test_string_too_large(ty):
     # Construct a binary array with a single value larger than 4GB
     s = b"0123456789abcdefghijklmnopqrstuvwxyz"
@@ -836,7 +847,7 @@ def test_sequence_bytes():
             u1.decode('utf-8'),  # unicode gets encoded,
             bytearray(b'bar'),
             None]
-    for ty in [None, pa.binary(), pa.large_binary()]:
+    for ty in [None, pa.binary(), pa.large_binary(), pa.binary_view()]:
         arr = pa.array(data, type=ty)
         assert len(arr) == 6
         assert arr.null_count == 1
@@ -844,7 +855,7 @@ def test_sequence_bytes():
         assert arr.to_pylist() == [b'foo', b'dada', b'data', u1, b'bar', None]
 
 
-@pytest.mark.parametrize("ty", [pa.string(), pa.large_string()])
+@pytest.mark.parametrize("ty", [pa.string(), pa.large_string(), pa.string_view()])
 def test_sequence_utf8_to_unicode(ty):
     # ARROW-1225
     data = [b'foo', None, b'bar']
@@ -2431,6 +2442,8 @@ def test_array_from_pylist_offset_overflow():
         pa.binary(3)),
     ([b"a"], [pa.scalar("a", type=pa.large_binary())], pa.large_binary()),
     (["a"], [pa.scalar("a", type=pa.large_string())], pa.large_string()),
+    ([b"a"], [pa.scalar("a", type=pa.binary_view())], pa.binary_view()),
+    (["a"], [pa.scalar("a", type=pa.string_view())], pa.string_view()),
     (
         ["a"],
         [pa.scalar("a", type=pa.dictionary(pa.int64(), pa.string()))],
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 4a239b23d5676..eed5f045be945 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -51,9 +51,8 @@
     (b"bytes", None, pa.BinaryScalar),
     ("largestring", pa.large_string(), pa.LargeStringScalar),
     (b"largebytes", pa.large_binary(), pa.LargeBinaryScalar),
-    # TODO(GH-39633) pa.scalar(..) requires python->arrow conversion to be implemented
-    # ("string_view", pa.string_view(), pa.StringViewScalar),
-    # (b"bytes_view", pa.binary_view(), pa.BinaryViewScalar),
+    ("string_view", pa.string_view(), pa.StringViewScalar),
+    (b"bytes_view", pa.binary_view(), pa.BinaryViewScalar),
     (b"abc", pa.binary(3), pa.FixedSizeBinaryScalar),
     ([1, 2, 3], None, pa.ListScalar),
     ([1, 2, 3, 4], pa.large_list(pa.int8()), pa.LargeListScalar),
@@ -492,7 +491,7 @@ def test_month_day_nano_interval():
 @pytest.mark.parametrize(('ty', 'scalar_typ'), [
     (pa.string(), pa.StringScalar),
     (pa.large_string(), pa.LargeStringScalar),
-    # (pa.string_view(), pa.StringViewScalar),
+    (pa.string_view(), pa.StringViewScalar),
 ])
 def test_string(value, ty, scalar_typ):
     s = pa.scalar(value, type=ty)
@@ -507,30 +506,11 @@ def test_string(value, ty, scalar_typ):
     assert buf.to_pybytes() == value.encode()
 
 
-@pytest.mark.parametrize('value', ['foo', 'mañana'])
-def test_string_view(value):
-    # TODO: replace with normal scalar construction
-    builder = pa.lib.StringViewBuilder()
-    builder.append(value)
-    arr = builder.finish()
-
-    s = arr[0]
-    assert isinstance(s, pa.StringViewScalar)
-    assert s.as_py() == value
-    assert s.as_py() != 'something'
-    assert repr(value) in repr(s)
-    assert str(s) == str(value)
-
-    buf = s.as_buffer()
-    assert isinstance(buf, pa.Buffer)
-    assert buf.to_pybytes() == value.encode()
-
-
 @pytest.mark.parametrize('value', [b'foo', b'bar'])
 @pytest.mark.parametrize(('ty', 'scalar_typ'), [
     (pa.binary(), pa.BinaryScalar),
     (pa.large_binary(), pa.LargeBinaryScalar),
-    # (pa.binary_view(), pa.BinaryViewScalar),
+    (pa.binary_view(), pa.BinaryViewScalar),
 ])
 def test_binary(value, ty, scalar_typ):
     s = pa.scalar(value, type=ty)

From 8ffc2140af3c994240b62f3c6412b8dbc889f604 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Wed, 7 Feb 2024 10:29:46 -0400
Subject: [PATCH 59/74] GH-39933: [R] Fix pointer conversion to Python for
 latest reticulate (#39969)

### Rationale for this change

The integration tests and documentation build is failing

### What changes are included in this PR?

Instead of relying on how reticulate converts an R external pointer, use a Python integer instead. We can't use an R integer (because they're only 32 bits); we can't use an R double (because the static cast to/from uintptr_t is a bit iffy); however, we can use Python to convert a string to Python integer. This is probably how I should have written it the first time but it didn't occur to me at the time.

### Are these changes tested?

Yes, covered by existing tests.

### Are there any user-facing changes?

No
* Closes: #39933

Lead-authored-by: Dewey Dunnington <dewey@fishandwhistle.net>
Co-authored-by: Dewey Dunnington <dewey@voltrondata.com>
Signed-off-by: Dewey Dunnington <dewey@voltrondata.com>
---
 r/R/python.R | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/r/R/python.R b/r/R/python.R
index 023d914f16a9e..1159806bf7c25 100644
--- a/r/R/python.R
+++ b/r/R/python.R
@@ -339,15 +339,9 @@ install_pyarrow <- function(envname = NULL, nightly = FALSE, ...) {
 }
 
 pyarrow_compatible_pointer <- function(ptr) {
-  pa <- reticulate::import("pyarrow")
-  version_string <- pa$`__version__`
-  # remove trailing .devXXX because it won't work with package_version()
-  pyarrow_version <- package_version(gsub("\\.dev.*?$", "", version_string))
-
-  # pyarrow pointers changed in version 7.0.0
-  if (pyarrow_version >= "7.0.0") {
-    return(ptr)
-  } else {
-    return(external_pointer_addr_double(ptr))
-  }
+  # GH-39933: Workaround because there is no built-in way to send a
+  # 64-bit integer to Python from an R object
+  py <- reticulate::import_builtins(convert = FALSE)
+  addr <- external_pointer_addr_character(ptr)
+  py$int(addr)
 }

From e83295b1aafbea985f0be61983b0b4fc9094854c Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Wed, 7 Feb 2024 11:02:23 -0500
Subject: [PATCH 60/74] MINOR: [Documentation] Fix LargeListView format string
 in example (#39974)

### Rationale for this change

https://github.com/apache/arrow/pull/38899 fixed the format string for
LargeListView in the table but left the incorrect format string in the
example below. This fixes that.

### What changes are included in this PR?

Documentation change

### Are these changes tested?

N/A
---
 docs/source/format/CDataInterface.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/format/CDataInterface.rst b/docs/source/format/CDataInterface.rst
index 812212f536169..ef4bf1cf3238d 100644
--- a/docs/source/format/CDataInterface.rst
+++ b/docs/source/format/CDataInterface.rst
@@ -251,7 +251,7 @@ Examples
   array has format string ``d:12,5``.
 * A ``list<uint64>`` array has format string ``+l``, and its single child
   has format string ``L``.
-* A ``large_list_view<uint64>`` array has format string ``+Lv``, and its single
+* A ``large_list_view<uint64>`` array has format string ``+vL``, and its single
   child has format string ``L``.
 * A ``struct<ints: int32, floats: float32>`` has format string ``+s``; its two
   children have names ``ints`` and ``floats``, and format strings ``i`` and

From f609bb171a8bce973d7b040d8684b04a60e806ed Mon Sep 17 00:00:00 2001
From: abandy <abandy@live.com>
Date: Wed, 7 Feb 2024 16:01:55 -0500
Subject: [PATCH 61/74] GH-39910: [Go] Add func to load prepared statement from
 ActionCreatePreparedStatementResult (#39913)

Currently, in order to create a PreparedStatement a DoAction call will always be made via the client. I need to be able to make a PreparedStatement from persisted data that will not trigger the DoAction call to the server.
* Closes: #39910

Authored-by: Alva Bandy <abandy@live.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/arrow/flight/flightsql/client.go      | 65 ++++++++++++++++++++++++
 go/arrow/flight/flightsql/client_test.go | 30 +++++++++++
 go/arrow/flight/flightsql/types.go       |  2 +
 3 files changed, 97 insertions(+)

diff --git a/go/arrow/flight/flightsql/client.go b/go/arrow/flight/flightsql/client.go
index 441f88f39f43a..068bfa84c3144 100644
--- a/go/arrow/flight/flightsql/client.go
+++ b/go/arrow/flight/flightsql/client.go
@@ -450,6 +450,31 @@ func (c *Client) PrepareSubstrait(ctx context.Context, plan SubstraitPlan, opts
 	return parsePreparedStatementResponse(c, c.Alloc, stream)
 }
 
+func (c *Client) LoadPreparedStatementFromResult(result *CreatePreparedStatementResult) (*PreparedStatement, error) {
+	var (
+		err                   error
+		dsSchema, paramSchema *arrow.Schema
+	)
+	if result.DatasetSchema != nil {
+		dsSchema, err = flight.DeserializeSchema(result.DatasetSchema, c.Alloc)
+		if err != nil {
+			return nil, err
+		}
+	}
+	if result.ParameterSchema != nil {
+		paramSchema, err = flight.DeserializeSchema(result.ParameterSchema, c.Alloc)
+		if err != nil {
+			return nil, err
+		}
+	}
+	return &PreparedStatement{
+		client:        c,
+		handle:        result.PreparedStatementHandle,
+		datasetSchema: dsSchema,
+		paramSchema:   paramSchema,
+	}, nil
+}
+
 func parsePreparedStatementResponse(c *Client, mem memory.Allocator, results pb.FlightService_DoActionClient) (*PreparedStatement, error) {
 	if err := results.CloseSend(); err != nil {
 		return nil, err
@@ -1027,6 +1052,46 @@ func (p *PreparedStatement) Execute(ctx context.Context, opts ...grpc.CallOption
 	return p.client.getFlightInfo(ctx, desc, opts...)
 }
 
+// ExecutePut calls DoPut for the prepared statement on the server. If SetParameters
+// has been called then the parameter bindings will be sent before execution.
+//
+// Will error if already closed.
+func (p *PreparedStatement) ExecutePut(ctx context.Context, opts ...grpc.CallOption) error {
+	if p.closed {
+		return errors.New("arrow/flightsql: prepared statement already closed")
+	}
+
+	cmd := &pb.CommandPreparedStatementQuery{PreparedStatementHandle: p.handle}
+
+	desc, err := descForCommand(cmd)
+	if err != nil {
+		return err
+	}
+
+	if p.hasBindParameters() {
+		pstream, err := p.client.Client.DoPut(ctx, opts...)
+		if err != nil {
+			return err
+		}
+
+		wr, err := p.writeBindParameters(pstream, desc)
+		if err != nil {
+			return err
+		}
+		if err = wr.Close(); err != nil {
+			return err
+		}
+		pstream.CloseSend()
+
+		// wait for the server to ack the result
+		if _, err = pstream.Recv(); err != nil && err != io.EOF {
+			return err
+		}
+	}
+
+	return nil
+}
+
 // ExecutePoll executes the prepared statement on the server and returns a PollInfo
 // indicating the progress of execution.
 //
diff --git a/go/arrow/flight/flightsql/client_test.go b/go/arrow/flight/flightsql/client_test.go
index c8b9f7f1246c1..f35aeefcf4628 100644
--- a/go/arrow/flight/flightsql/client_test.go
+++ b/go/arrow/flight/flightsql/client_test.go
@@ -665,6 +665,36 @@ func (s *FlightSqlClientSuite) TestRenewFlightEndpoint() {
 	s.Equal(&mockedRenewedEndpoint, renewedEndpoint)
 }
 
+func (s *FlightSqlClientSuite) TestPreparedStatementLoadFromResult() {
+	const query = "query"
+
+	result := &pb.ActionCreatePreparedStatementResult{
+		PreparedStatementHandle: []byte(query),
+	}
+	
+	parameterSchemaResult := arrow.NewSchema([]arrow.Field{{Name: "p_id", Type: arrow.PrimitiveTypes.Int64, Nullable: true}}, nil)
+	result.ParameterSchema = flight.SerializeSchema(parameterSchemaResult, memory.DefaultAllocator)
+	datasetSchemaResult := arrow.NewSchema([]arrow.Field{{Name: "ds_id", Type: arrow.PrimitiveTypes.Int64, Nullable: true}}, nil)
+	result.DatasetSchema = flight.SerializeSchema(datasetSchemaResult, memory.DefaultAllocator)
+
+	prepared, err := s.sqlClient.LoadPreparedStatementFromResult(result)
+	s.NoError(err)
+
+	s.Equal(string(prepared.Handle()), "query")
+
+	paramSchema := prepared.ParameterSchema()
+	paramRec, _, err := array.RecordFromJSON(memory.DefaultAllocator, paramSchema, strings.NewReader(`[{"p_id": 1}]`))
+	s.NoError(err)
+	defer paramRec.Release()
+
+	datasetSchema := prepared.DatasetSchema()
+	datasetRec, _, err := array.RecordFromJSON(memory.DefaultAllocator, datasetSchema, strings.NewReader(`[{"ds_id": 1}]`))
+	s.NoError(err)
+	defer datasetRec.Release()
+
+	s.Equal(string(prepared.Handle()), "query")
+}
+
 func TestFlightSqlClient(t *testing.T) {
 	suite.Run(t, new(FlightSqlClientSuite))
 }
diff --git a/go/arrow/flight/flightsql/types.go b/go/arrow/flight/flightsql/types.go
index d89e68f028bb8..c70a8bdc4ec26 100644
--- a/go/arrow/flight/flightsql/types.go
+++ b/go/arrow/flight/flightsql/types.go
@@ -852,3 +852,5 @@ const (
 	// cancellation request.
 	CancelResultNotCancellable = pb.ActionCancelQueryResult_CANCEL_RESULT_NOT_CANCELLABLE
 )
+
+type CreatePreparedStatementResult = pb.ActionCreatePreparedStatementResult

From 66b41c48554cf79fb449fd6c627e44cd0a202cd8 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Wed, 7 Feb 2024 20:21:07 -0400
Subject: [PATCH 62/74] GH-39738: [R] Support build against the last three
 released versions of Arrow (#39739)

### Rationale for this change

Development velocity of the R package has slowed considerably since early versions of Arrow such that the commit-level integration that we once relied on is no longer necessary. The ability to build against older versions of Arrow also opens up more options for our CRAN submissions, since we may be able to work with CRAN to build a version of Arrow C++ they are happy with.

This change doesn't require us to *do* anything about it...it just adds a check so that we are aware of the first PR that breaks the ability to build against a previous version.

There is a possibility that an accidentally but previously installed version will end up being used via pkg-config, which I believe is how the version checking came into existence in the first place.

### What changes are included in this PR?

- An `#if` to guard code that was added to support the string view/binary view
- Changes to the version checker script to not error for supported Arrow C++ versions
- CI job that checks build against supported Arrow versions

### Are these changes tested?

Yes, a CI job was added

### Are there any user-facing changes?

Yes, but I'll wait until there's consensus on this before documenting what our intended support policy will be.

* Closes: #39738

Lead-authored-by: Dewey Dunnington <dewey@voltrondata.com>
Co-authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Co-authored-by: Dewey Dunnington <dewey@fishandwhistle.net>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 .github/workflows/r.yml       | 57 +++++++++++++++++++++++++++++++++++
 r/PACKAGING.md                |  1 +
 r/src/r_to_arrow.cpp          |  9 ++++++
 r/tools/check-versions.R      | 35 +++++++++++++--------
 r/tools/test-check-versions.R | 40 ++++++++++++++++--------
 5 files changed, 116 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 3d1f75ede4bb5..8c47915b7b6d3 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -54,6 +54,63 @@ env:
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
+  ubuntu-minimum-cpp-version:
+    name: Check minimum supported Arrow C++ Version (${{ matrix.cpp_version }})
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - cpp_version: "13.0.0"
+    steps:
+    - name: Checkout Arrow
+      uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
+      with:
+        path: src
+        submodules: recursive
+
+    - name: Install Arrow C++ (${{ matrix.cpp_version }})
+      run: |
+        sudo apt update
+        sudo apt install -y -V ca-certificates lsb-release wget
+        wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+        sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+        sudo apt update
+        # We have to list all packages to avoid version conflicts.
+        sudo apt install -y -V libarrow-dev=${{ matrix.cpp_version }}-1 \
+                               libarrow-acero-dev=${{ matrix.cpp_version }}-1 \
+                               libparquet-dev=${{ matrix.cpp_version }}-1 \
+                               libarrow-dataset-dev=${{ matrix.cpp_version }}-1
+
+    - name: Install checkbashisms
+      run: |
+        sudo apt-get install devscripts
+
+    - uses: r-lib/actions/setup-r@v2
+      with:
+        use-public-rspm: true
+        install-r: false
+
+    - uses: r-lib/actions/setup-r-dependencies@v2
+      with:
+        extra-packages: any::rcmdcheck
+        needs: check
+        working-directory: src/r
+
+    - uses: r-lib/actions/check-r-package@v2
+      with:
+        working-directory: src/r
+      env:
+        LIBARROW_BINARY: "false"
+        LIBARROW_BUILD: "false"
+        ARROW_R_VERBOSE_TEST: "true"
+        ARROW_R_ALLOW_CPP_VERSION_MISMATCH: "true"
+
+    - name: Show install output
+      if: always()
+      run: find src/r/check -name '00install.out*' -exec cat '{}' \; || true
+      shell: bash
+
+
   ubuntu:
     name: AMD64 Ubuntu ${{ matrix.ubuntu }} R ${{ matrix.r }} Force-Tests ${{ matrix.force-tests }}
     runs-on: ubuntu-latest
diff --git a/r/PACKAGING.md b/r/PACKAGING.md
index 7f42ecf562e59..4edeb4f2130cc 100644
--- a/r/PACKAGING.md
+++ b/r/PACKAGING.md
@@ -26,6 +26,7 @@ For a high-level overview of the release process see the
 ## Before the release candidate is cut
 
 - [ ] [Create a GitHub issue](https://github.com/apache/arrow/issues/new/) entitled `[R] CRAN packaging checklist for version X.X.X` and copy this checklist to the issue.
+- [ ] Review deprecated functions to advance their deprecation status, including removing preprocessor directives that no longer apply (search for `ARROW_VERSION_MAJOR` in r/src).
 - [ ] Evaluate the status of any failing [nightly tests and nightly packaging builds](http://crossbow.voltrondata.com). These checks replicate most of the checks that CRAN runs, so we need them all to be passing or to understand that the failures may (though won't necessarily) result in a rejection from CRAN.
 - [ ] Check [current CRAN check results](https://cran.rstudio.org/web/checks/check_results_arrow.html)
 - [ ] Ensure the contents of the README are accurate and up to date.
diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp
index d2db11e14a787..a81210f0ad914 100644
--- a/r/src/r_to_arrow.cpp
+++ b/r/src/r_to_arrow.cpp
@@ -1050,6 +1050,7 @@ class RDictionaryConverter<ValueType, enable_if_has_string_view<ValueType>>
 template <typename T, typename Enable = void>
 struct RConverterTrait;
 
+#if ARROW_VERSION_MAJOR >= 15
 template <typename T>
 struct RConverterTrait<
     T, enable_if_t<!is_nested_type<T>::value && !is_interval_type<T>::value &&
@@ -1061,6 +1062,14 @@ template <typename T>
 struct RConverterTrait<T, enable_if_binary_view_like<T>> {
   // not implemented
 };
+#else
+template <typename T>
+struct RConverterTrait<
+    T, enable_if_t<!is_nested_type<T>::value && !is_interval_type<T>::value &&
+                   !is_extension_type<T>::value>> {
+  using type = RPrimitiveConverter<T>;
+};
+#endif
 
 template <typename T>
 struct RConverterTrait<T, enable_if_list_like<T>> {
diff --git a/r/tools/check-versions.R b/r/tools/check-versions.R
index 3d8cbf02a14c9..34b2ef680c547 100644
--- a/r/tools/check-versions.R
+++ b/r/tools/check-versions.R
@@ -20,6 +20,20 @@ args <- commandArgs(TRUE)
 # TESTING is set in test-check-version.R; it won't be set when called from configure
 test_mode <- exists("TESTING")
 
+release_version_supported <- function(r_version, cpp_version) {
+  r_version <- package_version(r_version)
+  cpp_version <- package_version(cpp_version)
+  major <- function(x) as.numeric(x[1, 1])
+  minimum_cpp_version <- package_version("13.0.0")
+
+  allow_mismatch <- identical(tolower(Sys.getenv("ARROW_R_ALLOW_CPP_VERSION_MISMATCH", "false")), "true")
+  # If we allow a version mismatch we still need to cover the minimum version (13.0.0 for now)
+  # we don't allow newer C++ versions as new features without additional feature gates are likely to
+  # break the R package
+  version_valid <- cpp_version >= minimum_cpp_version && major(cpp_version) <= major(r_version)
+  allow_mismatch && version_valid || major(r_version) == major(cpp_version)
+}
+
 check_versions <- function(r_version, cpp_version) {
   r_parsed <- package_version(r_version)
   r_dev_version <- r_parsed[1, 4]
@@ -39,20 +53,10 @@ check_versions <- function(r_version, cpp_version) {
       "*** > or retry with FORCE_BUNDLED_BUILD=true"
     )
     cat(paste0(msg, "\n", collapse = ""))
-  } else if (r_is_patch && as.character(r_parsed[1, 1:3]) == cpp_version) {
-    # Patch releases we do for CRAN feedback get an extra x.y.z.1 version.
-    # These should work with the x.y.z C++ library (which never has .1 added)
-    cat(
-      sprintf(
-        "*** > Using C++ library version %s with R package %s\n",
-        cpp_version,
-        r_version
-      )
-    )
-  } else if (r_version != cpp_version) {
+  } else if (cpp_is_dev || !release_version_supported(r_version, cpp_parsed)) {
     cat(
       sprintf(
-        "**** Not using: C++ library version (%s) does not match R package (%s)\n",
+        "**** Not using: C++ library version (%s): not supported by R package version %s\n",
         cpp_version,
         r_version
       )
@@ -61,7 +65,12 @@ check_versions <- function(r_version, cpp_version) {
     # Add ALLOW_VERSION_MISMATCH env var to override stop()? (Could be useful for debugging)
   } else {
     # OK
-    cat(sprintf("**** C++ and R library versions match: %s\n", cpp_version))
+    cat(
+      sprintf(
+        "**** C++ library version %s is supported by R version %s\n",
+        cpp_version, r_version
+      )
+    )
   }
 }
 
diff --git a/r/tools/test-check-versions.R b/r/tools/test-check-versions.R
index 9c284507b8801..f558648bed1e3 100644
--- a/r/tools/test-check-versions.R
+++ b/r/tools/test-check-versions.R
@@ -24,10 +24,10 @@ TESTING <- TRUE
 
 source("check-versions.R", local = TRUE)
 
-test_that("check_versions", {
+test_that("check_versions without mismatch", {
   expect_output(
     check_versions("10.0.0", "10.0.0"),
-    "**** C++ and R library versions match: 10.0.0",
+    "**** C++ library version 10.0.0 is supported by R version 10.0.0",
     fixed = TRUE
   )
   expect_output(
@@ -35,7 +35,7 @@ test_that("check_versions", {
       check_versions("10.0.0", "10.0.0-SNAPSHOT"),
       "version mismatch"
     ),
-    "**** Not using: C++ library version (10.0.0-SNAPSHOT) does not match R package (10.0.0)",
+    "**** Not using: C++ library version (10.0.0-SNAPSHOT): not supported by R package version 10.0.0",
     fixed = TRUE
   )
   expect_output(
@@ -43,20 +43,12 @@ test_that("check_versions", {
       check_versions("10.0.0.9000", "10.0.0-SNAPSHOT"),
       "version mismatch"
     ),
-    "**** Not using: C++ library version (10.0.0-SNAPSHOT) does not match R package (10.0.0.9000)",
-    fixed = TRUE
-  )
-  expect_output(
-    expect_error(
-      check_versions("10.0.0.9000", "10.0.0"),
-      "version mismatch"
-    ),
-    "**** Not using: C++ library version (10.0.0) does not match R package (10.0.0.9000)",
+    "**** Not using: C++ library version (10.0.0-SNAPSHOT): not supported by R package version 10.0.0.9000",
     fixed = TRUE
   )
   expect_output(
     check_versions("10.0.0.3", "10.0.0"),
-    "*** > Using C++ library version 10.0.0 with R package 10.0.0.3",
+    "**** C++ library version 10.0.0 is supported by R version 10.0.0.3",
     fixed = TRUE
   )
   expect_output(
@@ -65,3 +57,25 @@ test_that("check_versions", {
     fixed = TRUE
   )
 })
+
+test_that("check_versions with mismatch", {
+  withr::local_envvar(.new = c(ARROW_R_ALLOW_CPP_VERSION_MISMATCH = "false"))
+
+  expect_false(
+    release_version_supported("15.0.0", "13.0.0")
+  )
+
+  withr::local_envvar(.new = c(ARROW_R_ALLOW_CPP_VERSION_MISMATCH = "true"))
+
+  expect_true(
+    release_version_supported("15.0.0", "13.0.0")
+  )
+
+  expect_false(
+    release_version_supported("15.0.0", "16.0.0")
+  )
+
+  expect_false(
+    release_version_supported("15.0.0", "12.0.0")
+  )
+})

From e1241e74a92561d65c134c06b9d5a95deeb273f3 Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Thu, 8 Feb 2024 01:37:12 +0100
Subject: [PATCH 63/74] GH-39987: [R] Make it possible to use a rtools libarrow
 on windows  (#39986)

This enables the use of libarrow from rtools. This is currently only possible by cross compiling manually but will  be part of a future rtools version.

These changes can't be tested, there are no user facing changes for now.

* Closes: #39987

Lead-authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 r/configure.win | 201 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 187 insertions(+), 14 deletions(-)

diff --git a/r/configure.win b/r/configure.win
index 2d9e5cdf54e44..b6ac19faea2d4 100755
--- a/r/configure.win
+++ b/r/configure.win
@@ -17,33 +17,58 @@
 # specific language governing permissions and limitations
 # under the License.
 
+: ${PKG_CONFIG:="pkg-config"}
+# Library settings
+PKG_CONFIG_NAME="arrow"
+PKG_TEST_HEADER="<arrow/api.h>"
+
+VERSION=`grep '^Version' DESCRIPTION | sed s/Version:\ //`
+
+# Development mode, also increases verbosity in the bundled build
+ARROW_R_DEV=`echo $ARROW_R_DEV | tr '[:upper:]' '[:lower:]'`
+# If present, `pkg-config` will be used to find libarrow on the system,
+# unless this is set to false
+ARROW_USE_PKG_CONFIG=`echo $ARROW_USE_PKG_CONFIG | tr '[:upper:]' '[:lower:]'`
 
 # generate code
-if [ "$ARROW_R_DEV" == "TRUE" ]; then
+if [ "$ARROW_R_DEV" == "true" ]; then
   echo "*** Generating code with data-raw/codegen.R"
   "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" data-raw/codegen.R
 fi
 
-OPENSSL_LIBS="-lcrypto -lcrypt32"
-MIMALLOC_LIBS="-lbcrypt -lpsapi"
-BROTLI_LIBS="-lbrotlienc -lbrotlidec -lbrotlicommon" # Common goes last since dec and enc depend on it
-AWS_LIBS="-laws-cpp-sdk-config -laws-cpp-sdk-transfer -laws-cpp-sdk-identity-management \
-          -laws-cpp-sdk-cognito-identity -laws-cpp-sdk-sts -laws-cpp-sdk-s3 \
-          -laws-cpp-sdk-core -laws-c-event-stream -laws-checksums -laws-c-common \
-          -lUserenv -lversion -lws2_32 -lBcrypt -lWininet -lwinhttp"
-# pkg-config --libs libcurl
-GCS_LIBS="-lcurl -lnormaliz -lssh2 -lgdi32 -lssl -lcrypto -lcrypt32 -lwldap32 \
-          -lz -lws2_32 -lnghttp2 -ldbghelp"
+# Test if pkg-config is available to use
+if ${PKG_CONFIG} --version >/dev/null 2>&1; then
+  PKG_CONFIG_AVAILABLE="true"
+  echo "*** pkg-config found."
+else
+  echo "*** pkg-config not found."
+  PKG_CONFIG_AVAILABLE="false"
+  ARROW_USE_PKG_CONFIG="false"
+fi
 
-function configure_release() {
-  VERSION=$(grep ^Version DESCRIPTION | sed s/Version:\ //)
+
+function configure_binaries() {
   # Try to find/download a C++ Arrow binary,
   "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" "tools/nixlibs.R" $VERSION 
   # If binary not found, script exits nonzero
   if [ $? -ne 0 ]; then
+    _LIBARROW_FOUND="false"
     echo "Arrow C++ library was not found"
+    # return 0 so set -e doesn't exit the script
+    return 0
   fi
 
+  OPENSSL_LIBS="-lcrypto -lcrypt32"
+  MIMALLOC_LIBS="-lbcrypt -lpsapi"
+  BROTLI_LIBS="-lbrotlienc -lbrotlidec -lbrotlicommon" # Common goes last since dec and enc depend on it
+  AWS_LIBS="-laws-cpp-sdk-config -laws-cpp-sdk-transfer -laws-cpp-sdk-identity-management \
+            -laws-cpp-sdk-cognito-identity -laws-cpp-sdk-sts -laws-cpp-sdk-s3 \
+            -laws-cpp-sdk-core -laws-c-event-stream -laws-checksums -laws-c-common \
+            -luserenv -lversion -lws2_32 -lbcrypt -lwininet -lwinhttp"
+  # pkg-config --libs libcurl
+  GCS_LIBS="-lcurl -lnormaliz -lssh2 -lgdi32 -lssl -lcrypto -lcrypt32 -lwldap32 \
+            -lz -lws2_32 -lnghttp2 -ldbghelp"
+
   # Set the right flags to point to and enable arrow/parquet
   if [ -d "windows/arrow-$VERSION" ]; then
     RWINLIB="../windows/arrow-$VERSION"
@@ -75,12 +100,160 @@ function configure_release() {
     # It seems that order matters
     PKG_LIBS="${PKG_LIBS} -lws2_32"
   fi
+
+}
+
+# Once libarrow is obtained, this function sets `PKG_LIBS`, `PKG_DIRS`, and `PKG_CFLAGS`
+# either from pkg-config or by inferring things about the directory in $1
+set_pkg_vars () {
+  set_lib_dir_with_pc
+
+  # Check cmake options for enabled features. This uses LIB_DIR that
+  # is set by the above set_lib_dir_* call.
+  add_feature_flags
+  set_pkg_vars_with_pc
+
+  # Set any user-defined CXXFLAGS
+  if [ "$ARROW_R_CXXFLAGS" ]; then
+    PKG_CFLAGS="$PKG_CFLAGS $ARROW_R_CXXFLAGS"
+  fi
+
+  # We use expr because the product version returns more than just 10.13 and we want to
+  # match the substring. However, expr always outputs the number of matched characters
+  # to stdout, to avoid noise in the log we redirect the output to /dev/null
+  if [ "$UNAME" = "Darwin" ] && expr $(sw_vers -productVersion) : '10\.13' >/dev/null 2>&1; then
+    # avoid C++17 availability warnings on macOS < 11
+    PKG_CFLAGS="$PKG_CFLAGS -D_LIBCPP_DISABLE_AVAILABILITY"
+  fi
+}
+
+# If we have pkg-config, it will tell us what libarrow needs
+set_lib_dir_with_pc () {
+  LIB_DIR="`${PKG_CONFIG} --variable=libdir  ${PKG_CONFIG_NAME}`"
+}
+set_pkg_vars_with_pc () {
+  pkg_config_names="${PKG_CONFIG_NAME} ${PKG_CONFIG_NAMES_FEATURES}"
+  PKG_CFLAGS="`${PKG_CONFIG} --cflags  ${pkg_config_names}` $PKG_CFLAGS"
+  PKG_CFLAGS="$PKG_CFLAGS $PKG_CFLAGS_FEATURES"
+  PKG_LIBS=`${PKG_CONFIG} --libs-only-l --libs-only-other ${pkg_config_names}`
+  PKG_LIBS="$PKG_LIBS $PKG_LIBS_FEATURES"
+  PKG_DIRS=`${PKG_CONFIG} --libs-only-L  ${pkg_config_names}`
+}
+
+add_feature_flags () {
+  PKG_CFLAGS_FEATURES=""
+  PKG_CONFIG_NAMES_FEATURES=""
+  PKG_LIBS_FEATURES=""
+  PKG_LIBS_FEATURES_WITHOUT_PC=""
+
+  # Now we need to check what features it was built with and enable
+  # the corresponding feature flags in the R bindings (-DARROW_R_WITH_stuff).
+  # We do this by inspecting ArrowOptions.cmake, which the libarrow build
+  # generates.
+  ARROW_OPTS_CMAKE="$LIB_DIR/cmake/Arrow/ArrowOptions.cmake"
+  if [ ! -f "${ARROW_OPTS_CMAKE}" ]; then
+    echo "*** $ARROW_OPTS_CMAKE not found; some features will not be enabled"
+  else
+    if arrow_built_with ARROW_PARQUET; then
+      PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_PARQUET"
+      PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES parquet"
+      PKG_LIBS_FEATURES_WITHOUT_PC="-lparquet $PKG_LIBS_FEATURES_WITHOUT_PC"
+      # NOTE: parquet is assumed to have the same -L flag as arrow
+      # so there is no need to add its location to PKG_DIRS
+    fi
+    if arrow_built_with ARROW_DATASET; then
+      PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_DATASET"
+      PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-dataset"
+      PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_dataset $PKG_LIBS_FEATURES_WITHOUT_PC"
+      # NOTE: arrow_dataset is assumed to have the same -L flag as arrow
+      # so there is no need to add its location to PKG_DIRS
+    fi
+    if arrow_built_with ARROW_ACERO; then
+      PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_ACERO"
+      PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-acero"
+      PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_acero $PKG_LIBS_FEATURES_WITHOUT_PC"
+      # NOTE: arrow_acero is assumed to have the same -L flag as arrow
+      # so there is no need to add its location to PKG_DIRS
+    fi
+    if arrow_built_with ARROW_SUBSTRAIT; then
+      PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_SUBSTRAIT"
+      PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-substrait"
+      PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_substrait $PKG_LIBS_FEATURES_WITHOUT_PC"
+      # NOTE: arrow_substrait is assumed to have the same -L flag as arrow
+      # so there is no need to add its location to PKG_DIRS
+    fi
+    if arrow_built_with ARROW_JSON; then
+      PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_JSON"
+    fi
+    if arrow_built_with ARROW_S3; then
+      PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_S3"
+    fi
+    if arrow_built_with ARROW_GCS; then
+      PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_GCS"
+    fi
+    if arrow_built_with ARROW_GCS || arrow_built_with ARROW_S3; then
+      # If pkg-config is available it will handle this for us automatically
+      SSL_LIBS_WITHOUT_PC="-lcurl -lssl -lcrypto"
+    fi
+  fi
+}
+
+
+arrow_built_with() {
+  # Function to check cmake options for features
+  grep -i 'set('"$1"' "ON")' $ARROW_OPTS_CMAKE >/dev/null 2>&1
+}
+
+function configure_rtools() {
+  # Use pkg-config to find arrow from rtools
+  _LIBARROW_PREFIX="`${PKG_CONFIG} --variable=prefix ${PKG_CONFIG_NAME}`"
+  _LIBARROW_FOUND="true"
+  echo "*** Trying Arrow C++ found by pkg-config: $_LIBARROW_PREFIX"
+
+  PC_LIB_VERSION=`${PKG_CONFIG} --modversion ${PKG_CONFIG_NAME}`
+  # This is in an R script for convenience and testability.
+  # Success means the found C++ library is ok to use.
+  # Error means the versions don't line up and we shouldn't use it.
+  # More specific messaging to the user is in the R script
+  if ! ${R_HOME}/bin/Rscript tools/check-versions.R $VERSION $PC_LIB_VERSION 2> /dev/null; then
+    _LIBARROW_FOUND="false"
+  fi
+  
+  # We should have a valid libarrow build in $_LIBARROW_FOUND
+# Now set `PKG_LIBS`, `PKG_DIRS`, and `PKG_CFLAGS` based on that.
+if [ "$_LIBARROW_FOUND" == "true" ]; then
+  set_pkg_vars ${_LIBARROW_PREFIX}
+  # add mingw specific windows flags
+  PKG_LIBS="$PKG_LIBS -lws2_32 -lole32 -lwldap32 -lsecur32 -lncrypt -lcrypt32 -lshlwapi"
+  # override -fno-exceptions from aws-cpp-sdk pc file
+  PKG_CFLAGS="$PKG_CFLAGS -fexceptions"
+else
+  # To make it easier to debug which code path was taken add a specific 
+  # message to the log in addition to the 'NOTE'
+  echo "*** Failed to find Arrow C++ libraries in rtools"
+fi
+}
+
+function configure_release() {
+  if [ "$ARROW_USE_PKG_CONFIG" != "false" ] && $PKG_CONFIG --exists $PKG_CONFIG_NAME; then
+    configure_rtools
+  else
+    configure_binaries
+  fi 
+
+  if [ "$_LIBARROW_FOUND" == "false" ]; then
+    echo "------------------------- NOTE ---------------------------"
+    echo "There was an issue preparing the Arrow C++ libraries."
+    echo "See https://arrow.apache.org/docs/r/articles/install.html"
+    echo "----------------------------------------------------------"
+    exit 1
+  fi
 }
 
 # Returns 1 if CMAKE options is set "ON", otherwise 0
 function cmake_option() {
   ARROW_OPTS_CMAKE="$ARROW_HOME/lib/cmake/Arrow/ArrowOptions.cmake"
-  grep -cm1 "set($1 \"ON\")" $ARROW_OPTS_CMAKE
+  arrow_built_with $1
 }
 
 function configure_dev() {

From c38b0f33f3361350dd1321a93b53716e64489a69 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 8 Feb 2024 11:40:12 +0900
Subject: [PATCH 64/74] GH-39992: [CI][Docs][Java] ubuntu-docs uses Maven
 version in .env (#39993)

### Rationale for this change

GH-39696 updated Maven version but `ubuntu-docs` haven't used it yet.

### What changes are included in this PR?

Use `MAVEN` in `.env` in `ubuntu-docs`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #39992

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ci/docker/linux-apt-docs.dockerfile | 2 +-
 docker-compose.yml                  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile
index c51600a1e5920..3d102796b8c00 100644
--- a/ci/docker/linux-apt-docs.dockerfile
+++ b/ci/docker/linux-apt-docs.dockerfile
@@ -60,7 +60,7 @@ RUN apt-get update -y && \
 
 ENV JAVA_HOME=/usr/lib/jvm/java-${jdk}-openjdk-amd64
 
-ARG maven=3.5.4
+ARG maven=3.6.3
 COPY ci/scripts/util_download_apache.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/util_download_apache.sh \
     "maven/maven-3/${maven}/binaries/apache-maven-${maven}-bin.tar.gz" /opt
diff --git a/docker-compose.yml b/docker-compose.yml
index 8a7223b57632f..a31fa0d9aa659 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1740,6 +1740,7 @@ services:
       args:
         r: ${R}
         jdk: ${JDK}
+        maven: ${MAVEN}
         node: ${NODE}
         base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3
     environment:

From fa26fa56ce579f52de8f1fa07dfb103a122e2d8a Mon Sep 17 00:00:00 2001
From: av8or1 <fivetomins@yahoo.com>
Date: Wed, 7 Feb 2024 23:53:01 -0600
Subject: [PATCH 65/74] GH-38703: [C++][FS][Azure] Implement DeleteFile()
 (#39840)

### Rationale for this change

`DeleteFile()` API isn't implemented yet.

### What changes are included in this PR?

Implement `DeleteFile()` by the "Delete Blob" API: https://learn.microsoft.com/en-us/rest/api/storageservices/delete-blob

### Are these changes tested?

I tested the modification by creating a file via the web browser on our internal ADLS, then ran a sample program that deleted the file. I added three regression tests to cover the use case scenarios of:

* A valid delete attempt, where "valid" means that the file exists and is indeed a file
* An intentional failure where a file delete is attempted, but the file does not exist
* An intentional failure where a file delete is attempted, but the target is a container
* An intentional failure where a file delete is attempted, but the target is a directory

### Are there any user-facing changes?

Yes.

* Closes: #38703

Lead-authored-by: av8or1 <fivetomins@yahoo.com>
Co-authored-by: jerry.adair <Jerry.Adair@sas.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/filesystem/azurefs.cc      | 23 ++++++++++++++++-
 cpp/src/arrow/filesystem/azurefs_test.cc | 33 +++++++++++++++++++++++-
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index a5179c22190e1..87b9822878cce 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -1690,6 +1690,26 @@ class AzureFileSystem::Impl {
     }
   }
 
+  Status DeleteFile(const AzureLocation& location) {
+    RETURN_NOT_OK(ValidateFileLocation(location));
+    auto file_client = datalake_service_client_->GetFileSystemClient(location.container)
+                           .GetFileClient(location.path);
+    try {
+      auto response = file_client.Delete();
+      // Only the "*IfExists" functions ever set Deleted to false.
+      // All the others either succeed or throw an exception.
+      DCHECK(response.Value.Deleted);
+    } catch (const Storage::StorageException& exception) {
+      if (exception.ErrorCode == "FilesystemNotFound" ||
+          exception.ErrorCode == "PathNotFound") {
+        return PathNotFound(location);
+      }
+      return ExceptionToStatus(exception, "Failed to delete a file: ", location.path,
+                               ": ", file_client.GetUrl());
+    }
+    return Status::OK();
+  }
+
   Status CopyFile(const AzureLocation& src, const AzureLocation& dest) {
     RETURN_NOT_OK(ValidateFileLocation(src));
     RETURN_NOT_OK(ValidateFileLocation(dest));
@@ -1875,7 +1895,8 @@ Status AzureFileSystem::DeleteRootDirContents() {
 }
 
 Status AzureFileSystem::DeleteFile(const std::string& path) {
-  return Status::NotImplemented("The Azure FileSystem is not fully implemented");
+  ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(path));
+  return impl_->DeleteFile(location);
 }
 
 Status AzureFileSystem::Move(const std::string& src, const std::string& dest) {
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 6104b04411b32..4d123028ea86e 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -1382,6 +1382,38 @@ TEST_F(TestAzuriteFileSystem, DeleteDirContentsFailureNonexistent) {
   this->TestDeleteDirContentsFailureNonexistent();
 }
 
+TEST_F(TestAzuriteFileSystem, DeleteFileSuccess) {
+  const auto container_name = PreexistingData::RandomContainerName(rng_);
+  ASSERT_OK(fs()->CreateDir(container_name));
+  const auto file_name = ConcatAbstractPath(container_name, "abc");
+  CreateFile(fs(), file_name, "data");
+  arrow::fs::AssertFileInfo(fs(), file_name, FileType::File);
+  ASSERT_OK(fs()->DeleteFile(file_name));
+  arrow::fs::AssertFileInfo(fs(), file_name, FileType::NotFound);
+}
+
+TEST_F(TestAzuriteFileSystem, DeleteFileFailureNonexistent) {
+  const auto container_name = PreexistingData::RandomContainerName(rng_);
+  ASSERT_OK(fs()->CreateDir(container_name));
+  const auto nonexistent_file_name = ConcatAbstractPath(container_name, "nonexistent");
+  ASSERT_RAISES(IOError, fs()->DeleteFile(nonexistent_file_name));
+}
+
+TEST_F(TestAzuriteFileSystem, DeleteFileFailureContainer) {
+  const auto container_name = PreexistingData::RandomContainerName(rng_);
+  ASSERT_OK(fs()->CreateDir(container_name));
+  arrow::fs::AssertFileInfo(fs(), container_name, FileType::Directory);
+  ASSERT_RAISES(IOError, fs()->DeleteFile(container_name));
+}
+
+TEST_F(TestAzuriteFileSystem, DeleteFileFailureDirectory) {
+  const auto directory_name =
+      ConcatAbstractPath(PreexistingData::RandomContainerName(rng_), "directory");
+  ASSERT_OK(fs()->CreateDir(directory_name));
+  arrow::fs::AssertFileInfo(fs(), directory_name, FileType::Directory);
+  ASSERT_RAISES(IOError, fs()->DeleteFile(directory_name));
+}
+
 TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationNonexistent) {
   auto data = SetUpPreexistingData();
   const auto destination_path = data.ContainerPath("copy-destionation");
@@ -1868,6 +1900,5 @@ TEST_F(TestAzuriteFileSystem, OpenInputFileClosed) {
   ASSERT_RAISES(Invalid, stream->ReadAt(1, 1));
   ASSERT_RAISES(Invalid, stream->Seek(2));
 }
-
 }  // namespace fs
 }  // namespace arrow

From 026188e3bb36c58573f23215aedc14e6392264c2 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 8 Feb 2024 12:25:38 +0100
Subject: [PATCH 66/74] GH-37484: [Python] Add a FixedSizeTensorScalar class
 (#37533)

### Rationale for this change

When working with `FixedSizeTensorArray` we want to access individual tensors. This would be enabled by adding:
```python
def FixedSizeTensorScalar(pa.ExtensionScalar):
    def to_numpy_ndarray(): ...
```

See #37484.

### What changes are included in this PR?

This adds `FixedSizeTensorScalar` and tests for it.

### Are there any user-facing changes?

Yes, when calling `FixedSizeTensorArray[i]` we would get back `FixedSizeTensorScalar` instead of `ExtensionScalar`.
* Closes: #37484

Lead-authored-by: Rok Mihevc <rok@mihevc.org>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/extension/fixed_shape_tensor.cc | 130 +++++++---
 cpp/src/arrow/extension/fixed_shape_tensor.h  |  11 +-
 .../extension/fixed_shape_tensor_test.cc      | 226 ++++++++++++++++--
 cpp/src/arrow/extension/tensor_internal.h     |  45 ++++
 python/pyarrow/array.pxi                      |  77 ++++--
 python/pyarrow/includes/libarrow.pxd          |  12 +-
 python/pyarrow/scalar.pxi                     |  42 ++++
 python/pyarrow/tests/test_extension_type.py   | 123 ++++++++--
 python/pyarrow/types.pxi                      |  22 +-
 9 files changed, 566 insertions(+), 122 deletions(-)
 create mode 100644 cpp/src/arrow/extension/tensor_internal.h

diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.cc b/cpp/src/arrow/extension/fixed_shape_tensor.cc
index af8305a025291..02e0a890e4b3d 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor.cc
+++ b/cpp/src/arrow/extension/fixed_shape_tensor.cc
@@ -19,6 +19,8 @@
 #include <sstream>
 
 #include "arrow/extension/fixed_shape_tensor.h"
+#include "arrow/extension/tensor_internal.h"
+#include "arrow/scalar.h"
 
 #include "arrow/array/array_nested.h"
 #include "arrow/array/array_primitive.h"
@@ -86,7 +88,7 @@ bool FixedShapeTensorType::ExtensionEquals(const ExtensionType& other) const {
   if (extension_name() != other.extension_name()) {
     return false;
   }
-  const auto& other_ext = static_cast<const FixedShapeTensorType&>(other);
+  const auto& other_ext = internal::checked_cast<const FixedShapeTensorType&>(other);
 
   auto is_permutation_trivial = [](const std::vector<int64_t>& permutation) {
     for (size_t i = 1; i < permutation.size(); ++i) {
@@ -143,7 +145,7 @@ std::string FixedShapeTensorType::Serialize() const {
 
   if (!dim_names_.empty()) {
     rj::Value dim_names(rj::kArrayType);
-    for (std::string v : dim_names_) {
+    for (const std::string& v : dim_names_) {
       dim_names.PushBack(rj::Value{}.SetString(v.c_str(), allocator), allocator);
     }
     document.AddMember(rj::Value("dim_names", allocator), dim_names, allocator);
@@ -199,10 +201,52 @@ std::shared_ptr<Array> FixedShapeTensorType::MakeArray(
     std::shared_ptr<ArrayData> data) const {
   DCHECK_EQ(data->type->id(), Type::EXTENSION);
   DCHECK_EQ("arrow.fixed_shape_tensor",
-            static_cast<const ExtensionType&>(*data->type).extension_name());
+            internal::checked_cast<const ExtensionType&>(*data->type).extension_name());
   return std::make_shared<ExtensionArray>(data);
 }
 
+Result<std::shared_ptr<Tensor>> FixedShapeTensorType::MakeTensor(
+    const std::shared_ptr<ExtensionScalar>& scalar) {
+  const auto ext_scalar = internal::checked_pointer_cast<ExtensionScalar>(scalar);
+  const auto ext_type =
+      internal::checked_pointer_cast<FixedShapeTensorType>(scalar->type);
+  if (!is_fixed_width(*ext_type->value_type())) {
+    return Status::TypeError("Cannot convert non-fixed-width values to Tensor.");
+  }
+  const auto array =
+      internal::checked_pointer_cast<const FixedSizeListScalar>(ext_scalar->value)->value;
+  if (array->null_count() > 0) {
+    return Status::Invalid("Cannot convert data with nulls to Tensor.");
+  }
+  const auto value_type =
+      internal::checked_pointer_cast<FixedWidthType>(ext_type->value_type());
+  const auto byte_width = value_type->byte_width();
+
+  std::vector<int64_t> permutation = ext_type->permutation();
+  if (permutation.empty()) {
+    permutation.resize(ext_type->ndim());
+    std::iota(permutation.begin(), permutation.end(), 0);
+  }
+
+  std::vector<int64_t> shape = ext_type->shape();
+  internal::Permute<int64_t>(permutation, &shape);
+
+  std::vector<std::string> dim_names = ext_type->dim_names();
+  if (!dim_names.empty()) {
+    internal::Permute<std::string>(permutation, &dim_names);
+  }
+
+  std::vector<int64_t> strides;
+  RETURN_NOT_OK(ComputeStrides(*value_type.get(), shape, permutation, &strides));
+  const auto start_position = array->offset() * byte_width;
+  const auto size = std::accumulate(shape.begin(), shape.end(), static_cast<int64_t>(1),
+                                    std::multiplies<>());
+  const auto buffer =
+      SliceBuffer(array->data()->buffers[1], start_position, size * byte_width);
+
+  return Tensor::Make(ext_type->value_type(), buffer, shape, strides, dim_names);
+}
+
 Result<std::shared_ptr<FixedShapeTensorArray>> FixedShapeTensorArray::FromTensor(
     const std::shared_ptr<Tensor>& tensor) {
   auto permutation = internal::ArgSort(tensor->strides(), std::greater<>());
@@ -293,53 +337,71 @@ const Result<std::shared_ptr<Tensor>> FixedShapeTensorArray::ToTensor() const {
   // To convert an array of n dimensional tensors to a n+1 dimensional tensor we
   // interpret the array's length as the first dimension the new tensor.
 
-  auto ext_arr = std::static_pointer_cast<FixedSizeListArray>(this->storage());
-  auto ext_type = internal::checked_pointer_cast<FixedShapeTensorType>(this->type());
-  ARROW_RETURN_IF(!is_fixed_width(*ext_arr->value_type()),
-                  Status::Invalid(ext_arr->value_type()->ToString(),
-                                  " is not valid data type for a tensor"));
-  auto permutation = ext_type->permutation();
-
-  std::vector<std::string> dim_names;
-  if (!ext_type->dim_names().empty()) {
-    for (auto i : permutation) {
-      dim_names.emplace_back(ext_type->dim_names()[i]);
-    }
-    dim_names.insert(dim_names.begin(), 1, "");
+  const auto ext_type =
+      internal::checked_pointer_cast<FixedShapeTensorType>(this->type());
+  const auto value_type = ext_type->value_type();
+  ARROW_RETURN_IF(
+      !is_fixed_width(*value_type),
+      Status::TypeError(value_type->ToString(), " is not valid data type for a tensor"));
+
+  // ext_type->permutation() gives us permutation for a single row with values in
+  // range [0, ndim). Here want to create a ndim + 1 dimensional tensor from the entire
+  // array and we assume the first dimension will always have the greatest stride, so it
+  // will get permutation index 0 and remaining values from ext_type->permutation() need
+  // to be shifted to fill the [1, ndim+1) range. Computed permutation will be used to
+  // generate the new tensor's shape, strides and dim_names.
+  std::vector<int64_t> permutation = ext_type->permutation();
+  if (permutation.empty()) {
+    permutation.resize(ext_type->ndim() + 1);
+    std::iota(permutation.begin(), permutation.end(), 0);
   } else {
-    dim_names = {};
+    for (auto i = 0; i < static_cast<int64_t>(ext_type->ndim()); i++) {
+      permutation[i] += 1;
+    }
+    permutation.insert(permutation.begin(), 1, 0);
   }
 
-  std::vector<int64_t> shape;
-  for (int64_t& i : permutation) {
-    shape.emplace_back(ext_type->shape()[i]);
-    ++i;
+  std::vector<std::string> dim_names = ext_type->dim_names();
+  if (!dim_names.empty()) {
+    dim_names.insert(dim_names.begin(), 1, "");
+    internal::Permute<std::string>(permutation, &dim_names);
   }
+
+  std::vector<int64_t> shape = ext_type->shape();
+  auto cell_size = std::accumulate(shape.begin(), shape.end(), static_cast<int64_t>(1),
+                                   std::multiplies<>());
   shape.insert(shape.begin(), 1, this->length());
-  permutation.insert(permutation.begin(), 1, 0);
+  internal::Permute<int64_t>(permutation, &shape);
 
   std::vector<int64_t> tensor_strides;
-  auto value_type = internal::checked_pointer_cast<FixedWidthType>(ext_arr->value_type());
+  const auto fw_value_type = internal::checked_pointer_cast<FixedWidthType>(value_type);
   ARROW_RETURN_NOT_OK(
-      ComputeStrides(*value_type.get(), shape, permutation, &tensor_strides));
-  ARROW_ASSIGN_OR_RAISE(auto buffers, ext_arr->Flatten());
+      ComputeStrides(*fw_value_type.get(), shape, permutation, &tensor_strides));
+
+  const auto raw_buffer = this->storage()->data()->child_data[0]->buffers[1];
   ARROW_ASSIGN_OR_RAISE(
-      auto tensor, Tensor::Make(ext_arr->value_type(), buffers->data()->buffers[1], shape,
-                                tensor_strides, dim_names));
-  return tensor;
+      const auto buffer,
+      SliceBufferSafe(raw_buffer, this->offset() * cell_size * value_type->byte_width()));
+
+  return Tensor::Make(value_type, buffer, shape, tensor_strides, dim_names);
 }
 
 Result<std::shared_ptr<DataType>> FixedShapeTensorType::Make(
     const std::shared_ptr<DataType>& value_type, const std::vector<int64_t>& shape,
     const std::vector<int64_t>& permutation, const std::vector<std::string>& dim_names) {
-  if (!permutation.empty() && shape.size() != permutation.size()) {
-    return Status::Invalid("permutation size must match shape size. Expected: ",
-                           shape.size(), " Got: ", permutation.size());
+  const auto ndim = shape.size();
+  if (!permutation.empty() && ndim != permutation.size()) {
+    return Status::Invalid("permutation size must match shape size. Expected: ", ndim,
+                           " Got: ", permutation.size());
+  }
+  if (!dim_names.empty() && ndim != dim_names.size()) {
+    return Status::Invalid("dim_names size must match shape size. Expected: ", ndim,
+                           " Got: ", dim_names.size());
   }
-  if (!dim_names.empty() && shape.size() != dim_names.size()) {
-    return Status::Invalid("dim_names size must match shape size. Expected: ",
-                           shape.size(), " Got: ", dim_names.size());
+  if (!permutation.empty()) {
+    RETURN_NOT_OK(internal::IsPermutationValid(permutation));
   }
+
   const auto size = std::accumulate(shape.begin(), shape.end(), static_cast<int64_t>(1),
                                     std::multiplies<>());
   return std::make_shared<FixedShapeTensorType>(value_type, static_cast<int32_t>(size),
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.h b/cpp/src/arrow/extension/fixed_shape_tensor.h
index fcfb1ebbab96a..591a7cee32a34 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor.h
+++ b/cpp/src/arrow/extension/fixed_shape_tensor.h
@@ -64,7 +64,7 @@ class ARROW_EXPORT FixedShapeTensorType : public ExtensionType {
   std::string ToString() const override;
 
   /// Number of dimensions of tensor elements
-  size_t ndim() { return shape_.size(); }
+  size_t ndim() const { return shape_.size(); }
 
   /// Shape of tensor elements
   const std::vector<int64_t> shape() const { return shape_; }
@@ -94,6 +94,15 @@ class ARROW_EXPORT FixedShapeTensorType : public ExtensionType {
   /// Create a FixedShapeTensorArray from ArrayData
   std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
 
+  /// \brief Create a Tensor from an ExtensionScalar from a FixedShapeTensorArray
+  ///
+  /// This method will return a Tensor from ExtensionScalar with strides
+  /// derived from shape and permutation of FixedShapeTensorType. Shape and
+  /// dim_names will be permuted according to permutation stored in the
+  /// FixedShapeTensorType metadata.
+  static Result<std::shared_ptr<Tensor>> MakeTensor(
+      const std::shared_ptr<ExtensionScalar>& scalar);
+
   /// \brief Create a FixedShapeTensorType instance
   static Result<std::shared_ptr<DataType>> Make(
       const std::shared_ptr<DataType>& value_type, const std::vector<int64_t>& shape,
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
index 2b8e703d3c66e..3fd39a11ff50d 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
+++ b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
@@ -28,6 +28,7 @@
 #include "arrow/tensor.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/key_value_metadata.h"
+#include "arrow/util/sort.h"
 
 namespace arrow {
 
@@ -39,34 +40,34 @@ class TestExtensionType : public ::testing::Test {
  public:
   void SetUp() override {
     shape_ = {3, 3, 4};
-    cell_shape_ = {3, 4};
+    element_shape_ = {3, 4};
     value_type_ = int64();
-    cell_type_ = fixed_size_list(value_type_, 12);
+    element_type_ = fixed_size_list(value_type_, 12);
     dim_names_ = {"x", "y"};
     ext_type_ = internal::checked_pointer_cast<ExtensionType>(
-        fixed_shape_tensor(value_type_, cell_shape_, {}, dim_names_));
+        fixed_shape_tensor(value_type_, element_shape_, {}, dim_names_));
     values_ = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17,
                18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
     values_partial_ = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
                        12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
     shape_partial_ = {2, 3, 4};
     tensor_strides_ = {96, 32, 8};
-    cell_strides_ = {32, 8};
+    element_strides_ = {32, 8};
     serialized_ = R"({"shape":[3,4],"dim_names":["x","y"]})";
   }
 
  protected:
   std::vector<int64_t> shape_;
   std::vector<int64_t> shape_partial_;
-  std::vector<int64_t> cell_shape_;
+  std::vector<int64_t> element_shape_;
   std::shared_ptr<DataType> value_type_;
-  std::shared_ptr<DataType> cell_type_;
+  std::shared_ptr<DataType> element_type_;
   std::vector<std::string> dim_names_;
   std::shared_ptr<ExtensionType> ext_type_;
   std::vector<int64_t> values_;
   std::vector<int64_t> values_partial_;
   std::vector<int64_t> tensor_strides_;
-  std::vector<int64_t> cell_strides_;
+  std::vector<int64_t> element_strides_;
   std::string serialized_;
 };
 
@@ -96,8 +97,8 @@ TEST_F(TestExtensionType, CreateExtensionType) {
   // Test ExtensionType methods
   ASSERT_EQ(ext_type_->extension_name(), "arrow.fixed_shape_tensor");
   ASSERT_TRUE(ext_type_->Equals(*exact_ext_type));
-  ASSERT_FALSE(ext_type_->Equals(*cell_type_));
-  ASSERT_TRUE(ext_type_->storage_type()->Equals(*cell_type_));
+  ASSERT_FALSE(ext_type_->Equals(*element_type_));
+  ASSERT_TRUE(ext_type_->storage_type()->Equals(*element_type_));
   ASSERT_EQ(ext_type_->Serialize(), serialized_);
   ASSERT_OK_AND_ASSIGN(auto ds,
                        ext_type_->Deserialize(ext_type_->storage_type(), serialized_));
@@ -106,18 +107,28 @@ TEST_F(TestExtensionType, CreateExtensionType) {
 
   // Test FixedShapeTensorType methods
   ASSERT_EQ(exact_ext_type->id(), Type::EXTENSION);
-  ASSERT_EQ(exact_ext_type->ndim(), cell_shape_.size());
-  ASSERT_EQ(exact_ext_type->shape(), cell_shape_);
+  ASSERT_EQ(exact_ext_type->ndim(), element_shape_.size());
+  ASSERT_EQ(exact_ext_type->shape(), element_shape_);
   ASSERT_EQ(exact_ext_type->value_type(), value_type_);
-  ASSERT_EQ(exact_ext_type->strides(), cell_strides_);
+  ASSERT_EQ(exact_ext_type->strides(), element_strides_);
   ASSERT_EQ(exact_ext_type->dim_names(), dim_names_);
 
   EXPECT_RAISES_WITH_MESSAGE_THAT(
       Invalid, testing::HasSubstr("Invalid: permutation size must match shape size."),
-      FixedShapeTensorType::Make(value_type_, cell_shape_, {0}));
+      FixedShapeTensorType::Make(value_type_, element_shape_, {0}));
   EXPECT_RAISES_WITH_MESSAGE_THAT(
       Invalid, testing::HasSubstr("Invalid: dim_names size must match shape size."),
-      FixedShapeTensorType::Make(value_type_, cell_shape_, {}, {"x"}));
+      FixedShapeTensorType::Make(value_type_, element_shape_, {}, {"x"}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      testing::HasSubstr("Invalid: Permutation indices for 2 dimensional tensors must be "
+                         "unique and within [0, 1] range. Got: [3,0]"),
+      FixedShapeTensorType::Make(value_type_, {5, 6}, {3, 0}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      testing::HasSubstr("Invalid: Permutation indices for 3 dimensional tensors must be "
+                         "unique and within [0, 2] range. Got: [0,1,1]"),
+      FixedShapeTensorType::Make(value_type_, {1, 2, 3}, {0, 1, 1}));
 }
 
 TEST_F(TestExtensionType, EqualsCases) {
@@ -148,7 +159,7 @@ TEST_F(TestExtensionType, CreateFromArray) {
   std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, Buffer::Wrap(values_)};
   auto arr_data = std::make_shared<ArrayData>(value_type_, values_.size(), buffers, 0, 0);
   auto arr = std::make_shared<Int64Array>(arr_data);
-  ASSERT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, cell_type_));
+  ASSERT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, element_type_));
   auto ext_arr = ExtensionType::WrapArray(ext_type_, fsla_arr);
   ASSERT_EQ(ext_arr->length(), shape_[0]);
   ASSERT_EQ(ext_arr->null_count(), 0);
@@ -200,7 +211,7 @@ TEST_F(TestExtensionType, RoundtripBatch) {
   std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, Buffer::Wrap(values_)};
   auto arr_data = std::make_shared<ArrayData>(value_type_, values_.size(), buffers, 0, 0);
   auto arr = std::make_shared<Int64Array>(arr_data);
-  ASSERT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, cell_type_));
+  ASSERT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, element_type_));
   auto ext_arr = ExtensionType::WrapArray(ext_type_, fsla_arr);
 
   // Pass extension array, expect getting back extension array
@@ -215,7 +226,7 @@ TEST_F(TestExtensionType, RoundtripBatch) {
   auto ext_metadata =
       key_value_metadata({{"ARROW:extension:name", exact_ext_type->extension_name()},
                           {"ARROW:extension:metadata", serialized_}});
-  ext_field = field(/*name=*/"f0", /*type=*/cell_type_, /*nullable=*/true,
+  ext_field = field(/*name=*/"f0", /*type=*/element_type_, /*nullable=*/true,
                     /*metadata=*/ext_metadata);
   auto batch2 = RecordBatch::Make(schema({ext_field}), fsla_arr->length(), {fsla_arr});
   RoundtripBatch(batch2, &read_batch2);
@@ -270,7 +281,7 @@ TEST_F(TestExtensionType, CreateFromTensor) {
   auto ext_arr_5 = std::static_pointer_cast<FixedShapeTensorArray>(
       ExtensionType::WrapArray(ext_type_5, fsla_arr));
   EXPECT_RAISES_WITH_MESSAGE_THAT(
-      Invalid, testing::HasSubstr("binary is not valid data type for a tensor"),
+      TypeError, testing::HasSubstr("binary is not valid data type for a tensor"),
       ext_arr_5->ToTensor());
 
   auto ext_type_6 = internal::checked_pointer_cast<FixedShapeTensorType>(
@@ -278,6 +289,10 @@ TEST_F(TestExtensionType, CreateFromTensor) {
   auto arr_with_null = ArrayFromJSON(int64(), "[1, 0, null, null, 1, 2]");
   ASSERT_OK_AND_ASSIGN(auto fsla_arr_6, FixedSizeListArray::FromArrays(
                                             arr_with_null, fixed_size_list(int64(), 2)));
+
+  auto ext_type_7 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int64(), {3, 4}, {}));
+  ASSERT_OK_AND_ASSIGN(auto ext_arr_7, FixedShapeTensorArray::FromTensor(tensor));
 }
 
 void CheckFromTensorType(const std::shared_ptr<Tensor>& tensor,
@@ -308,7 +323,7 @@ TEST_F(TestExtensionType, TestFromTensorType) {
   auto dim_names = std::vector<std::vector<std::string>>{
       {"y", "z"}, {"z", "y"}, {"y", "z"}, {"z", "y"},
       {"y", "z"}, {"y", "z"}, {"y", "z"}, {"y", "z"}};
-  auto cell_shapes = std::vector<std::vector<int64_t>>{{3, 4}, {4, 3}, {4, 3}, {3, 4}};
+  auto element_shapes = std::vector<std::vector<int64_t>>{{3, 4}, {4, 3}, {4, 3}, {3, 4}};
   auto permutations = std::vector<std::vector<int64_t>>{{0, 1}, {1, 0}, {0, 1}, {1, 0}};
 
   for (size_t i = 0; i < shapes.size(); i++) {
@@ -316,11 +331,82 @@ TEST_F(TestExtensionType, TestFromTensorType) {
                                                    strides[i], tensor_dim_names[i]));
     ASSERT_OK_AND_ASSIGN(auto ext_arr, FixedShapeTensorArray::FromTensor(tensor));
     auto ext_type =
-        fixed_shape_tensor(value_type_, cell_shapes[i], permutations[i], dim_names[i]);
+        fixed_shape_tensor(value_type_, element_shapes[i], permutations[i], dim_names[i]);
     CheckFromTensorType(tensor, ext_type);
   }
 }
 
+template <typename T>
+void CheckToTensor(const std::vector<T>& values, const std::shared_ptr<DataType> typ,
+                   const int32_t& element_size, const std::vector<int64_t>& element_shape,
+                   const std::vector<int64_t>& element_permutation,
+                   const std::vector<std::string>& element_dim_names,
+                   const std::vector<int64_t>& tensor_shape,
+                   const std::vector<std::string>& tensor_dim_names,
+                   const std::vector<int64_t>& tensor_strides) {
+  auto buffer = Buffer::Wrap(values);
+  const std::shared_ptr<DataType> element_type = fixed_size_list(typ, element_size);
+  std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, buffer};
+  auto arr_data = std::make_shared<ArrayData>(typ, values.size(), buffers);
+  auto arr = std::make_shared<Int64Array>(arr_data);
+  ASSERT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, element_type));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto expected_tensor,
+      Tensor::Make(typ, buffer, tensor_shape, tensor_strides, tensor_dim_names));
+  const auto ext_type =
+      fixed_shape_tensor(typ, element_shape, element_permutation, element_dim_names);
+
+  auto ext_arr = ExtensionType::WrapArray(ext_type, fsla_arr);
+  const auto tensor_array = std::static_pointer_cast<FixedShapeTensorArray>(ext_arr);
+  ASSERT_OK_AND_ASSIGN(const auto actual_tensor, tensor_array->ToTensor());
+  ASSERT_OK(actual_tensor->Validate());
+
+  ASSERT_EQ(actual_tensor->type(), expected_tensor->type());
+  ASSERT_EQ(actual_tensor->shape(), expected_tensor->shape());
+  ASSERT_EQ(actual_tensor->strides(), expected_tensor->strides());
+  ASSERT_EQ(actual_tensor->dim_names(), expected_tensor->dim_names());
+  ASSERT_TRUE(actual_tensor->data()->Equals(*expected_tensor->data()));
+  ASSERT_TRUE(actual_tensor->Equals(*expected_tensor));
+}
+
+TEST_F(TestExtensionType, ToTensor) {
+  std::vector<float_t> float_values = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+                                       12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+                                       24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
+
+  auto element_sizes = std::vector<int32_t>{6, 6, 18, 18, 18, 18};
+
+  auto element_shapes = std::vector<std::vector<int64_t>>{{2, 3}, {3, 2},    {3, 6},
+                                                          {6, 3}, {3, 2, 3}, {3, 2, 3}};
+  auto tensor_shapes = std::vector<std::vector<int64_t>>{
+      {6, 2, 3}, {6, 2, 3}, {2, 3, 6}, {2, 3, 6}, {2, 3, 2, 3}, {2, 3, 2, 3}};
+
+  auto element_permutations = std::vector<std::vector<int64_t>>{
+      {0, 1}, {1, 0}, {0, 1}, {1, 0}, {0, 1, 2}, {2, 1, 0}};
+  auto tensor_strides_32 =
+      std::vector<std::vector<int64_t>>{{24, 12, 4}, {24, 4, 8},      {72, 24, 4},
+                                        {72, 4, 12}, {72, 24, 12, 4}, {72, 4, 12, 24}};
+  auto tensor_strides_64 =
+      std::vector<std::vector<int64_t>>{{48, 24, 8},  {48, 8, 16},      {144, 48, 8},
+                                        {144, 8, 24}, {144, 48, 24, 8}, {144, 8, 24, 48}};
+
+  auto element_dim_names = std::vector<std::vector<std::string>>{
+      {"y", "z"}, {"z", "y"}, {"y", "z"}, {"z", "y"}, {"H", "W", "C"}, {"H", "W", "C"}};
+  auto tensor_dim_names = std::vector<std::vector<std::string>>{
+      {"", "y", "z"}, {"", "y", "z"},      {"", "y", "z"},
+      {"", "y", "z"}, {"", "H", "W", "C"}, {"", "C", "W", "H"}};
+
+  for (size_t i = 0; i < element_shapes.size(); i++) {
+    CheckToTensor(float_values, float32(), element_sizes[i], element_shapes[i],
+                  element_permutations[i], element_dim_names[i], tensor_shapes[i],
+                  tensor_dim_names[i], tensor_strides_32[i]);
+    CheckToTensor(values_, int64(), element_sizes[i], element_shapes[i],
+                  element_permutations[i], element_dim_names[i], tensor_shapes[i],
+                  tensor_dim_names[i], tensor_strides_64[i]);
+  }
+}
+
 void CheckTensorRoundtrip(const std::shared_ptr<Tensor>& tensor) {
   ASSERT_OK_AND_ASSIGN(auto ext_arr, FixedShapeTensorArray::FromTensor(tensor));
   ASSERT_OK_AND_ASSIGN(auto tensor_from_array, ext_arr->ToTensor());
@@ -364,7 +450,7 @@ TEST_F(TestExtensionType, SliceTensor) {
       Tensor::Make(value_type_, Buffer::Wrap(values_partial_), shape_partial_));
   ASSERT_EQ(tensor->strides(), tensor_strides_);
   ASSERT_EQ(tensor_partial->strides(), tensor_strides_);
-  auto ext_type = fixed_shape_tensor(value_type_, cell_shape_, {}, dim_names_);
+  auto ext_type = fixed_shape_tensor(value_type_, element_shape_, {}, dim_names_);
   auto exact_ext_type = internal::checked_pointer_cast<FixedShapeTensorType>(ext_type_);
 
   ASSERT_OK_AND_ASSIGN(auto ext_arr, FixedShapeTensorArray::FromTensor(tensor));
@@ -404,11 +490,11 @@ TEST_F(TestExtensionType, ComputeStrides) {
   auto exact_ext_type = internal::checked_pointer_cast<FixedShapeTensorType>(ext_type_);
 
   auto ext_type_1 = internal::checked_pointer_cast<FixedShapeTensorType>(
-      fixed_shape_tensor(int64(), cell_shape_, {}, dim_names_));
+      fixed_shape_tensor(int64(), element_shape_, {}, dim_names_));
   auto ext_type_2 = internal::checked_pointer_cast<FixedShapeTensorType>(
-      fixed_shape_tensor(int64(), cell_shape_, {}, dim_names_));
+      fixed_shape_tensor(int64(), element_shape_, {}, dim_names_));
   auto ext_type_3 = internal::checked_pointer_cast<FixedShapeTensorType>(
-      fixed_shape_tensor(int32(), cell_shape_, {}, dim_names_));
+      fixed_shape_tensor(int32(), element_shape_, {}, dim_names_));
   ASSERT_TRUE(ext_type_1->Equals(*ext_type_2));
   ASSERT_FALSE(ext_type_1->Equals(*ext_type_3));
 
@@ -462,4 +548,96 @@ TEST_F(TestExtensionType, ToString) {
   ASSERT_EQ(expected_3, result_3);
 }
 
+TEST_F(TestExtensionType, GetTensor) {
+  auto arr = ArrayFromJSON(element_type_,
+                           "[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],"
+                           "[12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]]");
+  auto element_values =
+      std::vector<std::vector<int64_t>>{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
+                                        {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}};
+
+  auto ext_type = fixed_shape_tensor(value_type_, element_shape_, {}, dim_names_);
+  auto permuted_ext_type = fixed_shape_tensor(value_type_, {3, 4}, {1, 0}, {"x", "y"});
+  auto exact_ext_type = internal::checked_pointer_cast<FixedShapeTensorType>(ext_type);
+  auto exact_permuted_ext_type =
+      internal::checked_pointer_cast<FixedShapeTensorType>(permuted_ext_type);
+
+  auto array = std::static_pointer_cast<FixedShapeTensorArray>(
+      ExtensionType::WrapArray(ext_type, arr));
+  auto permuted_array = std::static_pointer_cast<FixedShapeTensorArray>(
+      ExtensionType::WrapArray(permuted_ext_type, arr));
+
+  for (size_t i = 0; i < element_values.size(); i++) {
+    // Get tensor from extension array with trivial permutation
+    ASSERT_OK_AND_ASSIGN(auto scalar, array->GetScalar(i));
+    auto actual_ext_scalar = internal::checked_pointer_cast<ExtensionScalar>(scalar);
+    ASSERT_OK_AND_ASSIGN(auto actual_tensor,
+                         exact_ext_type->MakeTensor(actual_ext_scalar));
+    ASSERT_OK(actual_tensor->Validate());
+    ASSERT_OK_AND_ASSIGN(auto expected_tensor,
+                         Tensor::Make(value_type_, Buffer::Wrap(element_values[i]),
+                                      {3, 4}, {}, {"x", "y"}));
+    ASSERT_EQ(expected_tensor->shape(), actual_tensor->shape());
+    ASSERT_EQ(expected_tensor->dim_names(), actual_tensor->dim_names());
+    ASSERT_EQ(expected_tensor->strides(), actual_tensor->strides());
+    ASSERT_EQ(actual_tensor->strides(), std::vector<int64_t>({32, 8}));
+    ASSERT_EQ(expected_tensor->type(), actual_tensor->type());
+    ASSERT_TRUE(expected_tensor->Equals(*actual_tensor));
+
+    // Get tensor from extension array with non-trivial permutation
+    ASSERT_OK_AND_ASSIGN(auto expected_permuted_tensor,
+                         Tensor::Make(value_type_, Buffer::Wrap(element_values[i]),
+                                      {4, 3}, {8, 24}, {"y", "x"}));
+    ASSERT_OK_AND_ASSIGN(scalar, permuted_array->GetScalar(i));
+    ASSERT_OK_AND_ASSIGN(auto actual_permuted_tensor,
+                         exact_permuted_ext_type->MakeTensor(
+                             internal::checked_pointer_cast<ExtensionScalar>(scalar)));
+    ASSERT_OK(actual_permuted_tensor->Validate());
+    ASSERT_EQ(expected_permuted_tensor->strides(), actual_permuted_tensor->strides());
+    ASSERT_EQ(expected_permuted_tensor->shape(), actual_permuted_tensor->shape());
+    ASSERT_EQ(expected_permuted_tensor->dim_names(), actual_permuted_tensor->dim_names());
+    ASSERT_EQ(expected_permuted_tensor->type(), actual_permuted_tensor->type());
+    ASSERT_EQ(expected_permuted_tensor->is_contiguous(),
+              actual_permuted_tensor->is_contiguous());
+    ASSERT_EQ(expected_permuted_tensor->is_column_major(),
+              actual_permuted_tensor->is_column_major());
+    ASSERT_TRUE(expected_permuted_tensor->Equals(*actual_permuted_tensor));
+  }
+
+  // Test null values fail
+  auto element_type = fixed_size_list(int64(), 1);
+  auto fsla_arr = ArrayFromJSON(element_type, "[[1], [null], null]");
+  ext_type = fixed_shape_tensor(int64(), {1});
+  exact_ext_type = internal::checked_pointer_cast<FixedShapeTensorType>(ext_type);
+  auto ext_arr = ExtensionType::WrapArray(ext_type, fsla_arr);
+  auto tensor_array = internal::checked_pointer_cast<ExtensionArray>(ext_arr);
+
+  ASSERT_OK_AND_ASSIGN(auto scalar, tensor_array->GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto tensor,
+                       exact_ext_type->MakeTensor(
+                           internal::checked_pointer_cast<ExtensionScalar>(scalar)));
+
+  ASSERT_OK_AND_ASSIGN(scalar, tensor_array->GetScalar(1));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("Invalid: Cannot convert data with nulls to Tensor."),
+      exact_ext_type->MakeTensor(
+          internal::checked_pointer_cast<ExtensionScalar>(scalar)));
+
+  ASSERT_OK_AND_ASSIGN(scalar, tensor_array->GetScalar(2));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("Invalid: Cannot convert data with nulls to Tensor."),
+      exact_ext_type->MakeTensor(
+          internal::checked_pointer_cast<ExtensionScalar>(scalar)));
+
+  element_type = list(utf8());
+  ext_type = fixed_shape_tensor(utf8(), {1});
+  exact_ext_type = internal::checked_pointer_cast<FixedShapeTensorType>(ext_type);
+  scalar = std::make_shared<ListScalar>(ArrayFromJSON(element_type, R"([["a", "b"]])"));
+  auto ext_scalar = std::make_shared<ExtensionScalar>(scalar, ext_type);
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      TypeError,
+      testing::HasSubstr("Type error: Cannot convert non-fixed-width values to Tensor."),
+      exact_ext_type->MakeTensor(ext_scalar));
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/extension/tensor_internal.h b/cpp/src/arrow/extension/tensor_internal.h
new file mode 100644
index 0000000000000..069880cb17c85
--- /dev/null
+++ b/cpp/src/arrow/extension/tensor_internal.h
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/util/print.h"
+
+namespace arrow::internal {
+
+ARROW_EXPORT
+Status IsPermutationValid(const std::vector<int64_t>& permutation) {
+  const auto size = static_cast<int64_t>(permutation.size());
+  std::vector<uint8_t> dim_seen(size, 0);
+
+  for (const auto p : permutation) {
+    if (p < 0 || p >= size || dim_seen[p] != 0) {
+      return Status::Invalid(
+          "Permutation indices for ", size,
+          " dimensional tensors must be unique and within [0, ", size - 1,
+          "] range. Got: ", ::arrow::internal::PrintVector{permutation, ","});
+    }
+    dim_seen[p] = 1;
+  }
+  return Status::OK();
+}
+
+}  // namespace arrow::internal
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 1029f3a629817..5ab07f21d5b71 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -3541,7 +3541,7 @@ cdef class ExtensionArray(Array):
         return result
 
 
-class FixedShapeTensorArray(ExtensionArray):
+cdef class FixedShapeTensorArray(ExtensionArray):
     """
     Concrete class for fixed shape tensor extension arrays.
 
@@ -3582,17 +3582,48 @@ class FixedShapeTensorArray(ExtensionArray):
 
     def to_numpy_ndarray(self):
         """
-        Convert fixed shape tensor extension array to a numpy array (with dim+1).
+        Convert fixed shape tensor extension array to a multi-dimensional numpy.ndarray.
 
-        Note: ``permutation`` should be trivial (``None`` or ``[0, 1, ..., len(shape)-1]``).
+        The resulting ndarray will have (ndim + 1) dimensions.
+        The size of the first dimension will be the length of the fixed shape tensor array
+        and the rest of the dimensions will match the permuted shape of the fixed
+        shape tensor.
+
+        The conversion is zero-copy.
+
+        Returns
+        -------
+        numpy.ndarray
+            Ndarray representing tensors in the fixed shape tensor array concatenated
+            along the first dimension.
         """
-        if self.type.permutation is None or self.type.permutation == list(range(len(self.type.shape))):
-            np_flat = np.asarray(self.storage.flatten())
-            numpy_tensor = np_flat.reshape((len(self),) + tuple(self.type.shape))
-            return numpy_tensor
-        else:
-            raise ValueError(
-                'Only non-permuted tensors can be converted to numpy tensors.')
+
+        return self.to_tensor().to_numpy()
+
+    def to_tensor(self):
+        """
+        Convert fixed shape tensor extension array to a pyarrow.Tensor.
+
+        The resulting Tensor will have (ndim + 1) dimensions.
+        The size of the first dimension will be the length of the fixed shape tensor array
+        and the rest of the dimensions will match the permuted shape of the fixed
+        shape tensor.
+
+        The conversion is zero-copy.
+
+        Returns
+        -------
+        pyarrow.Tensor
+            Tensor representing tensors in the fixed shape tensor array concatenated
+            along the first dimension.
+        """
+
+        cdef:
+            CFixedShapeTensorArray* ext_array = <CFixedShapeTensorArray*>(self.ap)
+            CResult[shared_ptr[CTensor]] ctensor
+        with nogil:
+            ctensor = ext_array.ToTensor()
+        return pyarrow_wrap_tensor(GetResultValue(ctensor))
 
     @staticmethod
     def from_numpy_ndarray(obj):
@@ -3600,9 +3631,7 @@ class FixedShapeTensorArray(ExtensionArray):
         Convert numpy tensors (ndarrays) to a fixed shape tensor extension array.
         The first dimension of ndarray will become the length of the fixed
         shape tensor array.
-
-        Numpy array needs to be C-contiguous in memory
-        (``obj.flags["C_CONTIGUOUS"]==True``).
+        If input array data is not contiguous a copy will be made.
 
         Parameters
         ----------
@@ -3636,17 +3665,25 @@ class FixedShapeTensorArray(ExtensionArray):
           ]
         ]
         """
-        if not obj.flags["C_CONTIGUOUS"]:
-            raise ValueError('The data in the numpy array need to be in a single, '
-                             'C-style contiguous segment.')
+
+        if len(obj.shape) < 2:
+            raise ValueError(
+                "Cannot convert 1D array or scalar to fixed shape tensor array")
+        if np.prod(obj.shape) == 0:
+            raise ValueError("Expected a non-empty ndarray")
+
+        permutation = (-np.array(obj.strides)).argsort(kind='stable')
+        if permutation[0] != 0:
+            raise ValueError('First stride needs to be largest to ensure that '
+                             'individual tensor data is contiguous in memory.')
 
         arrow_type = from_numpy_dtype(obj.dtype)
-        shape = obj.shape[1:]
-        size = obj.size / obj.shape[0]
+        shape = np.take(obj.shape, permutation)
+        values = np.ravel(obj, order="K")
 
         return ExtensionArray.from_storage(
-            fixed_shape_tensor(arrow_type, shape),
-            FixedSizeListArray.from_arrays(np.ravel(obj, order='C'), size)
+            fixed_shape_tensor(arrow_type, shape[1:], permutation=permutation[1:] - 1),
+            FixedSizeListArray.from_arrays(values, shape[1:].prod())
         )
 
 
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index d92f09da779b6..6149bee97236f 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -2704,26 +2704,26 @@ cdef extern from "arrow/extension_type.h" namespace "arrow":
         shared_ptr[CArray] storage()
 
 
-cdef extern from "arrow/extension/fixed_shape_tensor.h" namespace "arrow::extension":
+cdef extern from "arrow/extension/fixed_shape_tensor.h" namespace "arrow::extension" nogil:
     cdef cppclass CFixedShapeTensorType \
             " arrow::extension::FixedShapeTensorType"(CExtensionType):
 
+        CResult[shared_ptr[CTensor]] MakeTensor(const shared_ptr[CExtensionScalar]& scalar) const
+
         @staticmethod
         CResult[shared_ptr[CDataType]] Make(const shared_ptr[CDataType]& value_type,
                                             const vector[int64_t]& shape,
                                             const vector[int64_t]& permutation,
                                             const vector[c_string]& dim_names)
 
-        CResult[shared_ptr[CDataType]] Deserialize(const shared_ptr[CDataType] storage_type,
-                                                   const c_string& serialized_data) const
-
-        c_string Serialize() const
-
         const shared_ptr[CDataType] value_type()
         const vector[int64_t] shape()
         const vector[int64_t] permutation()
         const vector[c_string] dim_names()
 
+    cdef cppclass CFixedShapeTensorArray \
+            " arrow::extension::FixedShapeTensorArray"(CExtensionArray):
+        const CResult[shared_ptr[CTensor]] ToTensor() const
 
 cdef extern from "arrow/util/compression.h" namespace "arrow" nogil:
     cdef enum CCompressionType" arrow::Compression::type":
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 2772acf81861c..80ca3ea84187e 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -1035,6 +1035,48 @@ cdef class ExtensionScalar(Scalar):
         return pyarrow_wrap_scalar(<shared_ptr[CScalar]> sp_scalar)
 
 
+cdef class FixedShapeTensorScalar(ExtensionScalar):
+    """
+    Concrete class for fixed shape tensor extension scalar.
+    """
+
+    def to_numpy(self):
+        """
+        Convert fixed shape tensor scalar to a numpy.ndarray.
+
+        The resulting ndarray's shape matches the permuted shape of the
+        fixed shape tensor scalar.
+        The conversion is zero-copy.
+
+        Returns
+        -------
+        numpy.ndarray
+        """
+        return self.to_tensor().to_numpy()
+
+    def to_tensor(self):
+        """
+        Convert fixed shape tensor extension scalar to a pyarrow.Tensor, using shape
+        and strides derived from corresponding FixedShapeTensorType.
+
+        The conversion is zero-copy.
+
+        Returns
+        -------
+        pyarrow.Tensor
+            Tensor represented stored in FixedShapeTensorScalar.
+        """
+        cdef:
+            CFixedShapeTensorType* c_type = static_pointer_cast[CFixedShapeTensorType, CDataType](
+                self.wrapped.get().type).get()
+            shared_ptr[CExtensionScalar] scalar = static_pointer_cast[CExtensionScalar, CScalar](self.wrapped)
+            shared_ptr[CTensor] ctensor
+
+        with nogil:
+            ctensor = GetResultValue(c_type.MakeTensor(scalar))
+        return pyarrow_wrap_tensor(ctensor)
+
+
 cdef dict _scalar_classes = {
     _Type_BOOL: BooleanScalar,
     _Type_UINT8: UInt8Scalar,
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index d8c792ef00c6b..fe38bf651baae 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -1318,39 +1318,120 @@ def test_tensor_type():
     assert tensor_type.permutation is None
 
 
-def test_tensor_class_methods():
-    tensor_type = pa.fixed_shape_tensor(pa.float32(), [2, 3])
-    storage = pa.array([[1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6]],
-                       pa.list_(pa.float32(), 6))
+@pytest.mark.parametrize("value_type", (np.int8(), np.int64(), np.float32()))
+def test_tensor_class_methods(value_type):
+    from numpy.lib.stride_tricks import as_strided
+    arrow_type = pa.from_numpy_dtype(value_type)
+
+    tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 3])
+    storage = pa.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
+                       pa.list_(arrow_type, 6))
     arr = pa.ExtensionArray.from_storage(tensor_type, storage)
     expected = np.array(
-        [[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]], dtype=np.float32)
-    result = arr.to_numpy_ndarray()
+        [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], dtype=value_type)
+    np.testing.assert_array_equal(arr.to_tensor(), expected)
+    np.testing.assert_array_equal(arr.to_numpy_ndarray(), expected)
+
+    expected = np.array([[[7, 8, 9], [10, 11, 12]]], dtype=value_type)
+    result = arr[1:].to_numpy_ndarray()
     np.testing.assert_array_equal(result, expected)
 
-    expected = np.array([[[1, 2, 3], [4, 5, 6]]], dtype=np.float32)
-    result = arr[:1].to_numpy_ndarray()
+    values = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]
+    flat_arr = np.array(values[0], dtype=value_type)
+    bw = value_type.itemsize
+    storage = pa.array(values, pa.list_(arrow_type, 12))
+
+    tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[0, 1, 2])
+    result = pa.ExtensionArray.from_storage(tensor_type, storage)
+    expected = np.array(
+        [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]], dtype=value_type)
+    np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
+
+    result = flat_arr.reshape(1, 2, 3, 2)
+    expected = np.array(
+        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]], dtype=value_type)
     np.testing.assert_array_equal(result, expected)
 
-    arr = np.array(
-        [[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]],
-        dtype=np.float32, order="C")
+    tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[0, 2, 1])
+    result = pa.ExtensionArray.from_storage(tensor_type, storage)
+    expected = as_strided(flat_arr, shape=(1, 2, 3, 2),
+                          strides=(bw * 12, bw * 6, bw, bw * 3))
+    np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
+
+    tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[2, 0, 1])
+    result = pa.ExtensionArray.from_storage(tensor_type, storage)
+    expected = as_strided(flat_arr, shape=(1, 3, 2, 2),
+                          strides=(bw * 12, bw, bw * 6, bw * 2))
+    np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
+
+    assert result.type.permutation == [2, 0, 1]
+    assert result.type.shape == [2, 2, 3]
+    assert result.to_tensor().shape == (1, 3, 2, 2)
+    assert result.to_tensor().strides == (12 * bw, 1 * bw, 6 * bw, 2 * bw)
+
+
+@pytest.mark.parametrize("value_type", (np.int8(), np.int64(), np.float32()))
+def test_tensor_array_from_numpy(value_type):
+    from numpy.lib.stride_tricks import as_strided
+    arrow_type = pa.from_numpy_dtype(value_type)
+
+    arr = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
+                   dtype=value_type, order="C")
     tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
     assert isinstance(tensor_array_from_numpy.type, pa.FixedShapeTensorType)
-    assert tensor_array_from_numpy.type.value_type == pa.float32()
+    assert tensor_array_from_numpy.type.value_type == arrow_type
     assert tensor_array_from_numpy.type.shape == [2, 3]
 
-    arr = np.array(
-        [[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]],
-        dtype=np.float32, order="F")
-    with pytest.raises(ValueError, match="C-style contiguous segment"):
+    arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]],
+                   dtype=value_type, order="F")
+    with pytest.raises(ValueError, match="First stride needs to be largest"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
 
-    tensor_type = pa.fixed_shape_tensor(pa.int8(), [2, 2, 3], permutation=[0, 2, 1])
-    storage = pa.array([[1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]], pa.list_(pa.int8(), 12))
-    arr = pa.ExtensionArray.from_storage(tensor_type, storage)
-    with pytest.raises(ValueError, match="non-permuted tensors"):
-        arr.to_numpy_ndarray()
+    flat_arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=value_type)
+    bw = value_type.itemsize
+
+    arr = flat_arr.reshape(1, 3, 4)
+    tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
+    assert tensor_array_from_numpy.type.shape == [3, 4]
+    assert tensor_array_from_numpy.type.permutation == [0, 1]
+    assert tensor_array_from_numpy.to_tensor() == pa.Tensor.from_numpy(arr)
+
+    arr = as_strided(flat_arr, shape=(1, 2, 3, 2),
+                     strides=(bw * 12, bw * 6, bw, bw * 3))
+    tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
+    assert tensor_array_from_numpy.type.shape == [2, 2, 3]
+    assert tensor_array_from_numpy.type.permutation == [0, 2, 1]
+    assert tensor_array_from_numpy.to_tensor() == pa.Tensor.from_numpy(arr)
+
+    arr = flat_arr.reshape(1, 2, 3, 2)
+    result = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
+    expected = np.array(
+        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]], dtype=value_type)
+    np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
+
+    arr = np.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], dtype=value_type)
+    expected = arr[1:]
+    result = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)[1:].to_numpy_ndarray()
+    np.testing.assert_array_equal(result, expected)
+
+    arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=value_type)
+    with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
+        pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
+
+    arr = np.array(1, dtype=value_type)
+    with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
+        pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
+
+    arr = np.array([], dtype=value_type)
+
+    with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
+        pa.FixedShapeTensorArray.from_numpy_ndarray(arr.reshape((0)))
+
+    with pytest.raises(ValueError, match="Expected a non-empty ndarray"):
+        pa.FixedShapeTensorArray.from_numpy_ndarray(arr.reshape((0, 3, 2)))
+
+    with pytest.raises(ValueError, match="Expected a non-empty ndarray"):
+        pa.FixedShapeTensorArray.from_numpy_ndarray(arr.reshape((3, 0, 2)))
 
 
 @pytest.mark.parametrize("tensor_type", (
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index ce3736b5af847..b9ba157a327a5 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -1658,20 +1658,6 @@ cdef class FixedShapeTensorType(BaseExtensionType):
         else:
             return None
 
-    def __arrow_ext_serialize__(self):
-        """
-        Serialized representation of metadata to reconstruct the type object.
-        """
-        return self.tensor_ext_type.Serialize()
-
-    @classmethod
-    def __arrow_ext_deserialize__(self, storage_type, serialized):
-        """
-        Return an FixedShapeTensor type instance from the storage type and serialized
-        metadata.
-        """
-        return self.tensor_ext_type.Deserialize(storage_type, serialized)
-
     def __arrow_ext_class__(self):
         return FixedShapeTensorArray
 
@@ -1679,6 +1665,9 @@ cdef class FixedShapeTensorType(BaseExtensionType):
         return fixed_shape_tensor, (self.value_type, self.shape,
                                     self.dim_names, self.permutation)
 
+    def __arrow_ext_scalar_class__(self):
+        return FixedShapeTensorScalar
+
 
 _py_extension_type_auto_load = False
 
@@ -4976,8 +4965,9 @@ def fixed_shape_tensor(DataType value_type, shape, dim_names=None, permutation=N
 
     cdef FixedShapeTensorType out = FixedShapeTensorType.__new__(FixedShapeTensorType)
 
-    c_tensor_ext_type = GetResultValue(CFixedShapeTensorType.Make(
-        value_type.sp_type, c_shape, c_permutation, c_dim_names))
+    with nogil:
+        c_tensor_ext_type = GetResultValue(CFixedShapeTensorType.Make(
+            value_type.sp_type, c_shape, c_permutation, c_dim_names))
 
     out.init(c_tensor_ext_type)
 

From 42e35f101e87e689dcc48981abf81bc32c41d162 Mon Sep 17 00:00:00 2001
From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com>
Date: Thu, 8 Feb 2024 09:44:19 -0500
Subject: [PATCH 67/74] GH-39812: [Python] Add bindings for ListView and
 LargeListView (#39813)

### Rationale for this change

Add bindings to the ListView and LargeListView array formats.

### What changes are included in this PR?

* Add initial implementation for ListView and LargeListView
* Add basic unit tests

### Are these changes tested?

* Basic unit tests only (follow up PRs will be needed to implement full functionality)

### Are there any user-facing changes?

Yes, documentation is updated in this PR to include the new PyArrow objects.
* Closes: #39812

Lead-authored-by: Dane Pitkin <dane@voltrondata.com>
Co-authored-by: Dane Pitkin <48041712+danepitkin@users.noreply.github.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 docs/source/python/api/arrays.rst    |   4 +
 docs/source/python/api/datatypes.rst |   4 +
 python/pyarrow/__init__.py           |  14 +-
 python/pyarrow/array.pxi             | 574 +++++++++++++++++++++++++++
 python/pyarrow/includes/libarrow.pxd |  90 +++++
 python/pyarrow/lib.pxd               |  18 +
 python/pyarrow/lib.pyx               |   2 +
 python/pyarrow/public-api.pxi        |   4 +
 python/pyarrow/scalar.pxi            |  10 +
 python/pyarrow/tests/test_array.py   |  71 ++++
 python/pyarrow/tests/test_misc.py    |   4 +
 python/pyarrow/tests/test_scalars.py |   8 +-
 python/pyarrow/tests/test_types.py   |  49 +++
 python/pyarrow/types.pxi             | 171 ++++++++
 python/pyarrow/types.py              |  10 +
 15 files changed, 1027 insertions(+), 6 deletions(-)

diff --git a/docs/source/python/api/arrays.rst b/docs/source/python/api/arrays.rst
index b858862dcff01..e6f6c3dbbd3d1 100644
--- a/docs/source/python/api/arrays.rst
+++ b/docs/source/python/api/arrays.rst
@@ -77,6 +77,8 @@ may expose data type-specific methods or properties.
    ListArray
    FixedSizeListArray
    LargeListArray
+   ListViewArray
+   LargeListViewArray
    MapArray
    RunEndEncodedArray
    StructArray
@@ -135,6 +137,8 @@ classes may expose data type-specific methods or properties.
    RunEndEncodedScalar
    ListScalar
    LargeListScalar
+   ListViewScalar
+   LargeListViewScalar
    MapScalar
    StructScalar
    UnionScalar
diff --git a/docs/source/python/api/datatypes.rst b/docs/source/python/api/datatypes.rst
index 642c243b21af0..62bf4b7723558 100644
--- a/docs/source/python/api/datatypes.rst
+++ b/docs/source/python/api/datatypes.rst
@@ -60,6 +60,8 @@ These should be used to create Arrow data types and schemas.
    decimal128
    list_
    large_list
+   list_view
+   large_list_view
    map_
    struct
    dictionary
@@ -149,6 +151,8 @@ represents a given data type (such as ``int32``) or general category
    is_list
    is_large_list
    is_fixed_size_list
+   is_list_view
+   is_large_list_view
    is_struct
    is_union
    is_nested
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 4dbd1258d3cea..2ee97ddb662e5 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -166,7 +166,8 @@ def print_entry(label, value):
                          binary, string, utf8, binary_view, string_view,
                          large_binary, large_string, large_utf8,
                          decimal128, decimal256,
-                         list_, large_list, map_, struct,
+                         list_, large_list, list_view, large_list_view,
+                         map_, struct,
                          union, sparse_union, dense_union,
                          dictionary,
                          run_end_encoded,
@@ -174,8 +175,9 @@ def print_entry(label, value):
                          field,
                          type_for_alias,
                          DataType, DictionaryType, StructType,
-                         ListType, LargeListType, MapType, FixedSizeListType,
-                         UnionType, SparseUnionType, DenseUnionType,
+                         ListType, LargeListType, FixedSizeListType,
+                         ListViewType, LargeListViewType,
+                         MapType, UnionType, SparseUnionType, DenseUnionType,
                          TimestampType, Time32Type, Time64Type, DurationType,
                          FixedSizeBinaryType, Decimal128Type, Decimal256Type,
                          BaseExtensionType, ExtensionType,
@@ -201,8 +203,9 @@ def print_entry(label, value):
                          Int32Array, UInt32Array,
                          Int64Array, UInt64Array,
                          HalfFloatArray, FloatArray, DoubleArray,
-                         ListArray, LargeListArray, MapArray,
-                         FixedSizeListArray, UnionArray,
+                         ListArray, LargeListArray, FixedSizeListArray,
+                         ListViewArray, LargeListViewArray,
+                         MapArray, UnionArray,
                          BinaryArray, StringArray,
                          LargeBinaryArray, LargeStringArray,
                          BinaryViewArray, StringViewArray,
@@ -220,6 +223,7 @@ def print_entry(label, value):
                          HalfFloatScalar, FloatScalar, DoubleScalar,
                          Decimal128Scalar, Decimal256Scalar,
                          ListScalar, LargeListScalar, FixedSizeListScalar,
+                         ListViewScalar, LargeListViewScalar,
                          Date32Scalar, Date64Scalar,
                          Time32Scalar, Time64Scalar,
                          TimestampScalar, DurationScalar,
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 5ab07f21d5b71..ad01d45571ba1 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -2460,6 +2460,578 @@ cdef class LargeListArray(BaseListArray):
         return pyarrow_wrap_array((<CLargeListArray*> self.ap).offsets())
 
 
+cdef class ListViewArray(Array):
+    """
+    Concrete class for Arrow arrays of a list view data type.
+    """
+
+    @staticmethod
+    def from_arrays(offsets, sizes, values, DataType type=None, MemoryPool pool=None, mask=None):
+        """
+        Construct ListViewArray from arrays of int32 offsets, sizes, and values.
+
+        Parameters
+        ----------
+        offsets : Array (int32 type)
+        sizes : Array (int32 type)
+        values : Array (any type)
+        type : DataType, optional
+            If not specified, a default ListType with the values' type is
+            used.
+        pool : MemoryPool, optional
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
+        Returns
+        -------
+        list_view_array : ListViewArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = pa.array([1, 2, 3, 4])
+        >>> offsets = pa.array([0, 1, 2])
+        >>> sizes = pa.array([2, 2, 2])
+        >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [
+            2,
+            3
+          ],
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # use a null mask to represent null values
+        >>> mask = pa.array([False, True, False])
+        >>> pa.ListViewArray.from_arrays(offsets, sizes, values, mask=mask)
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # null values can be defined in either offsets or sizes arrays
+        >>> # WARNING: this will result in a copy of the offsets or sizes arrays
+        >>> offsets = pa.array([0, None, 2])
+        >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        """
+        cdef:
+            Array _offsets, _sizes, _values
+            shared_ptr[CArray] out
+            shared_ptr[CBuffer] c_mask
+            CMemoryPool* cpool = maybe_unbox_memory_pool(pool)
+
+        _offsets = asarray(offsets, type='int32')
+        _sizes = asarray(sizes, type='int32')
+        _values = asarray(values)
+
+        c_mask = c_mask_inverted_from_obj(mask, pool)
+
+        if type is not None:
+            with nogil:
+                out = GetResultValue(
+                    CListViewArray.FromArraysAndType(
+                        type.sp_type, _offsets.ap[0], _sizes.ap[0], _values.ap[0], cpool, c_mask))
+        else:
+            with nogil:
+                out = GetResultValue(
+                    CListViewArray.FromArrays(
+                        _offsets.ap[0], _sizes.ap[0], _values.ap[0], cpool, c_mask))
+        cdef Array result = pyarrow_wrap_array(out)
+        result.validate()
+        return result
+
+    @property
+    def values(self):
+        """
+        Return the underlying array of values which backs the ListViewArray
+        ignoring the array's offset and sizes.
+
+        The values array may be out of order and/or contain additional values
+        that are not found in the logical representation of the array. The only
+        guarantee is that each non-null value in the ListView Array is contiguous.
+
+        Compare with :meth:`flatten`, which returns only the non-null
+        values taking into consideration the array's order and offset.
+
+        Returns
+        -------
+        values : Array
+
+        Examples
+        --------
+        The values include null elements from sub-lists:
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+        >>> array
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [],
+          [
+            2,
+            null,
+            3,
+            4
+          ]
+        ]
+        >>> array.values
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          1,
+          2,
+          null,
+          3,
+          4
+        ]
+        """
+        cdef CListViewArray* arr = <CListViewArray*> self.ap
+        return pyarrow_wrap_array(arr.values())
+
+    @property
+    def offsets(self):
+        """
+        Return the list offsets as an int32 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `ListViewArray.from_arrays` and get back the same
+        list array if the original one has nulls.
+
+        Returns
+        -------
+        offsets : Int32Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+        >>> array.offsets
+        <pyarrow.lib.Int32Array object at ...>
+        [
+          0,
+          0,
+          1
+        ]
+        """
+        return pyarrow_wrap_array((<CListViewArray*> self.ap).offsets())
+
+    @property
+    def sizes(self):
+        """
+        Return the list sizes as an int32 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `ListViewArray.from_arrays` and get back the same
+        list array if the original one has nulls.
+
+        Returns
+        -------
+        sizes : Int32Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+        >>> array.sizes
+        <pyarrow.lib.Int32Array object at ...>
+        [
+          2,
+          0,
+          4
+        ]
+        """
+        return pyarrow_wrap_array((<CListViewArray*> self.ap).sizes())
+
+    def flatten(self, memory_pool=None):
+        """
+        Unnest this ListViewArray by one level.
+
+        The returned Array is logically a concatenation of all the sub-lists
+        in this Array.
+
+        Note that this method is different from ``self.values`` in that
+        it takes care of the slicing offset as well as null elements backed
+        by non-empty sub-lists.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, optional
+
+        Returns
+        -------
+        result : Array
+
+        Examples
+        --------
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, 3, 4]
+        >>> offsets = [2, 1, 0]
+        >>> sizes = [2, 2, 2]
+        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+        >>> array
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            3,
+            4
+          ],
+          [
+            2,
+            3
+          ],
+          [
+            1,
+            2
+          ]
+        ]
+        >>> array.flatten()
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          3,
+          4,
+          2,
+          3,
+          1,
+          2
+        ]
+        """
+        cdef CMemoryPool* cpool = maybe_unbox_memory_pool(memory_pool)
+        with nogil:
+            out = GetResultValue((<CListViewArray*> self.ap).Flatten(cpool))
+        cdef Array result = pyarrow_wrap_array(out)
+        result.validate()
+        return result
+
+
+cdef class LargeListViewArray(Array):
+    """
+    Concrete class for Arrow arrays of a large list view data type.
+
+    Identical to ListViewArray, but with 64-bit offsets.
+    """
+    @staticmethod
+    def from_arrays(offsets, sizes, values, DataType type=None, MemoryPool pool=None, mask=None):
+        """
+        Construct LargeListViewArray from arrays of int64 offsets and values.
+
+        Parameters
+        ----------
+        offsets : Array (int64 type)
+        sizes : Array (int64 type)
+        values : Array (any type)
+        type : DataType, optional
+            If not specified, a default ListType with the values' type is
+            used.
+        pool : MemoryPool, optional
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
+        Returns
+        -------
+        list_view_array : LargeListViewArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = pa.array([1, 2, 3, 4])
+        >>> offsets = pa.array([0, 1, 2])
+        >>> sizes = pa.array([2, 2, 2])
+        >>> pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [
+            2,
+            3
+          ],
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # use a null mask to represent null values
+        >>> mask = pa.array([False, True, False])
+        >>> pa.LargeListViewArray.from_arrays(offsets, sizes, values, mask=mask)
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # null values can be defined in either offsets or sizes arrays
+        >>> # WARNING: this will result in a copy of the offsets or sizes arrays
+        >>> offsets = pa.array([0, None, 2])
+        >>> pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        """
+        cdef:
+            Array _offsets, _sizes, _values
+            shared_ptr[CArray] out
+            shared_ptr[CBuffer] c_mask
+            CMemoryPool* cpool = maybe_unbox_memory_pool(pool)
+
+        _offsets = asarray(offsets, type='int64')
+        _sizes = asarray(sizes, type='int64')
+        _values = asarray(values)
+
+        c_mask = c_mask_inverted_from_obj(mask, pool)
+
+        if type is not None:
+            with nogil:
+                out = GetResultValue(
+                    CLargeListViewArray.FromArraysAndType(
+                        type.sp_type, _offsets.ap[0], _sizes.ap[0], _values.ap[0], cpool, c_mask))
+        else:
+            with nogil:
+                out = GetResultValue(
+                    CLargeListViewArray.FromArrays(
+                        _offsets.ap[0], _sizes.ap[0], _values.ap[0], cpool, c_mask))
+        cdef Array result = pyarrow_wrap_array(out)
+        result.validate()
+        return result
+
+    @property
+    def values(self):
+        """
+        Return the underlying array of values which backs the LargeListArray
+        ignoring the array's offset.
+
+        The values array may be out of order and/or contain additional values
+        that are not found in the logical representation of the array. The only
+        guarantee is that each non-null value in the ListView Array is contiguous.
+
+        Compare with :meth:`flatten`, which returns only the non-null
+        values taking into consideration the array's order and offset.
+
+        Returns
+        -------
+        values : Array
+
+        See Also
+        --------
+        LargeListArray.flatten : ...
+
+        Examples
+        --------
+
+        The values include null elements from sub-lists:
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        >>> array
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [],
+          [
+            2,
+            null,
+            3,
+            4
+          ]
+        ]
+        >>> array.values
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          1,
+          2,
+          null,
+          3,
+          4
+        ]
+        """
+        cdef CLargeListViewArray* arr = <CLargeListViewArray*> self.ap
+        return pyarrow_wrap_array(arr.values())
+
+    @property
+    def offsets(self):
+        """
+        Return the list view offsets as an int64 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `LargeListViewArray.from_arrays` and get back the
+        same list array if the original one has nulls.
+
+        Returns
+        -------
+        offsets : Int64Array
+
+        Examples
+        --------
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        >>> array.offsets
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          0,
+          0,
+          1
+        ]
+        """
+        return pyarrow_wrap_array((<CLargeListViewArray*> self.ap).offsets())
+
+    @property
+    def sizes(self):
+        """
+        Return the list view sizes as an int64 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `LargeListViewArray.from_arrays` and get back the
+        same list array if the original one has nulls.
+
+        Returns
+        -------
+        sizes : Int64Array
+
+        Examples
+        --------
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        >>> array.sizes
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          2,
+          0,
+          4
+        ]
+        """
+        return pyarrow_wrap_array((<CLargeListViewArray*> self.ap).sizes())
+
+    def flatten(self, memory_pool=None):
+        """
+        Unnest this LargeListViewArray by one level.
+
+        The returned Array is logically a concatenation of all the sub-lists
+        in this Array.
+
+        Note that this method is different from ``self.values`` in that
+        it takes care of the slicing offset as well as null elements backed
+        by non-empty sub-lists.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, optional
+
+        Returns
+        -------
+        result : Array
+
+        Examples
+        --------
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, 3, 4]
+        >>> offsets = [2, 1, 0]
+        >>> sizes = [2, 2, 2]
+        >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        >>> array
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            3,
+            4
+          ],
+          [
+            2,
+            3
+          ],
+          [
+            1,
+            2
+          ]
+        ]
+        >>> array.flatten()
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          3,
+          4,
+          2,
+          3,
+          1,
+          2
+        ]
+        """
+        cdef CMemoryPool* cpool = maybe_unbox_memory_pool(memory_pool)
+        with nogil:
+            out = GetResultValue((<CLargeListViewArray*> self.ap).Flatten(cpool))
+        cdef Array result = pyarrow_wrap_array(out)
+        result.validate()
+        return result
+
+
 cdef class MapArray(ListArray):
     """
     Concrete class for Arrow arrays of a map data type.
@@ -3710,6 +4282,8 @@ cdef dict _array_classes = {
     _Type_DOUBLE: DoubleArray,
     _Type_LIST: ListArray,
     _Type_LARGE_LIST: LargeListArray,
+    _Type_LIST_VIEW: ListViewArray,
+    _Type_LARGE_LIST_VIEW: LargeListViewArray,
     _Type_MAP: MapArray,
     _Type_FIXED_SIZE_LIST: FixedSizeListArray,
     _Type_SPARSE_UNION: UnionArray,
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 6149bee97236f..8056d99354965 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -132,6 +132,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         _Type_LIST" arrow::Type::LIST"
         _Type_LARGE_LIST" arrow::Type::LARGE_LIST"
         _Type_FIXED_SIZE_LIST" arrow::Type::FIXED_SIZE_LIST"
+        _Type_LIST_VIEW" arrow::Type::LIST_VIEW"
+        _Type_LARGE_LIST_VIEW" arrow::Type::LARGE_LIST_VIEW"
         _Type_STRUCT" arrow::Type::STRUCT"
         _Type_SPARSE_UNION" arrow::Type::SPARSE_UNION"
         _Type_DENSE_UNION" arrow::Type::DENSE_UNION"
@@ -366,6 +368,18 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         shared_ptr[CDataType] value_type()
         shared_ptr[CField] value_field()
 
+    cdef cppclass CListViewType" arrow::ListViewType"(CDataType):
+        CListViewType(const shared_ptr[CDataType]& value_type)
+        CListViewType(const shared_ptr[CField]& field)
+        shared_ptr[CDataType] value_type()
+        shared_ptr[CField] value_field()
+
+    cdef cppclass CLargeListViewType" arrow::LargeListViewType"(CDataType):
+        CLargeListViewType(const shared_ptr[CDataType]& value_type)
+        CLargeListViewType(const shared_ptr[CField]& field)
+        shared_ptr[CDataType] value_type()
+        shared_ptr[CField] value_field()
+
     cdef cppclass CMapType" arrow::MapType"(CDataType):
         CMapType(const shared_ptr[CField]& key_field,
                  const shared_ptr[CField]& item_field, c_bool keys_sorted)
@@ -485,6 +499,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         shared_ptr[CDataType] run_end_type,
         shared_ptr[CDataType] value_type)
 
+    cdef shared_ptr[CDataType] CMakeListViewType" arrow::list_view"(
+        shared_ptr[CField] value_type)
+
+    cdef shared_ptr[CDataType] CMakeLargeListViewType" arrow::large_list_view"(
+        shared_ptr[CField] value_type)
+
     cdef cppclass CSchema" arrow::Schema":
         CSchema(const vector[shared_ptr[CField]]& fields)
         CSchema(const vector[shared_ptr[CField]]& fields,
@@ -690,6 +710,70 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         shared_ptr[CArray] values()
         shared_ptr[CDataType] value_type()
 
+    cdef cppclass CListViewArray" arrow::ListViewArray"(CArray):
+        @staticmethod
+        CResult[shared_ptr[CArray]] FromArrays(
+            const CArray& offsets,
+            const CArray& sizes,
+            const CArray& values,
+            CMemoryPool* pool,
+            shared_ptr[CBuffer] null_bitmap,
+        )
+
+        @staticmethod
+        CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"(
+            shared_ptr[CDataType],
+            const CArray& offsets,
+            const CArray& sizes,
+            const CArray& values,
+            CMemoryPool* pool,
+            shared_ptr[CBuffer] null_bitmap,
+        )
+
+        CResult[shared_ptr[CArray]] Flatten(
+            CMemoryPool* pool
+        )
+
+        const int32_t* raw_value_offsets()
+        const int32_t* raw_value_sizes()
+        int32_t value_offset(int i)
+        int32_t value_length(int i)
+        shared_ptr[CArray] values()
+        shared_ptr[CArray] offsets()
+        shared_ptr[CArray] sizes()
+        shared_ptr[CDataType] value_type()
+
+    cdef cppclass CLargeListViewArray" arrow::LargeListViewArray"(CArray):
+        @staticmethod
+        CResult[shared_ptr[CArray]] FromArrays(
+            const CArray& offsets,
+            const CArray& sizes,
+            const CArray& values,
+            CMemoryPool* pool,
+            shared_ptr[CBuffer] null_bitmap,
+        )
+
+        @staticmethod
+        CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"(
+            shared_ptr[CDataType],
+            const CArray& offsets,
+            const CArray& sizes,
+            const CArray& values,
+            CMemoryPool* pool,
+            shared_ptr[CBuffer] null_bitmap,
+        )
+
+        CResult[shared_ptr[CArray]] Flatten(
+            CMemoryPool* pool
+        )
+
+        int64_t value_offset(int i)
+        int64_t value_length(int i)
+        shared_ptr[CArray] values()
+        shared_ptr[CArray] offsets()
+        shared_ptr[CArray] sizes()
+        shared_ptr[CDataType] value_type()
+
     cdef cppclass CMapArray" arrow::MapArray"(CArray):
         @staticmethod
         CResult[shared_ptr[CArray]] FromArrays(
@@ -1150,6 +1234,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
     cdef cppclass CListScalar" arrow::ListScalar"(CBaseListScalar):
         pass
 
+    cdef cppclass CListViewScalar" arrow::ListViewScalar"(CBaseListScalar):
+        pass
+
+    cdef cppclass CLargeListViewScalar" arrow::LargeListViewScalar"(CBaseListScalar):
+        pass
+
     cdef cppclass CMapScalar" arrow::MapScalar"(CListScalar):
         pass
 
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index c1104864066e9..48350212c2076 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -120,6 +120,16 @@ cdef class LargeListType(DataType):
         const CLargeListType* list_type
 
 
+cdef class ListViewType(DataType):
+    cdef:
+        const CListViewType* list_view_type
+
+
+cdef class LargeListViewType(DataType):
+    cdef:
+        const CLargeListViewType* list_view_type
+
+
 cdef class MapType(DataType):
     cdef:
         const CMapType* map_type
@@ -425,6 +435,14 @@ cdef class LargeListArray(BaseListArray):
     pass
 
 
+cdef class ListViewArray(Array):
+    pass
+
+
+cdef class LargeListViewArray(Array):
+    pass
+
+
 cdef class MapArray(ListArray):
     pass
 
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index b0368b67f790e..3245e50f0fe69 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -110,6 +110,8 @@ Type_BINARY_VIEW = _Type_BINARY_VIEW
 Type_STRING_VIEW = _Type_STRING_VIEW
 Type_LIST = _Type_LIST
 Type_LARGE_LIST = _Type_LARGE_LIST
+Type_LIST_VIEW = _Type_LIST_VIEW
+Type_LARGE_LIST_VIEW = _Type_LARGE_LIST_VIEW
 Type_MAP = _Type_MAP
 Type_FIXED_SIZE_LIST = _Type_FIXED_SIZE_LIST
 Type_STRUCT = _Type_STRUCT
diff --git a/python/pyarrow/public-api.pxi b/python/pyarrow/public-api.pxi
index 72e16f2cec387..966273b4bea84 100644
--- a/python/pyarrow/public-api.pxi
+++ b/python/pyarrow/public-api.pxi
@@ -87,6 +87,10 @@ cdef api object pyarrow_wrap_data_type(
         out = ListType.__new__(ListType)
     elif type.get().id() == _Type_LARGE_LIST:
         out = LargeListType.__new__(LargeListType)
+    elif type.get().id() == _Type_LIST_VIEW:
+        out = ListViewType.__new__(ListViewType)
+    elif type.get().id() == _Type_LARGE_LIST_VIEW:
+        out = LargeListViewType.__new__(LargeListViewType)
     elif type.get().id() == _Type_MAP:
         out = MapType.__new__(MapType)
     elif type.get().id() == _Type_FIXED_SIZE_LIST:
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 80ca3ea84187e..41bfde39adb6f 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -720,6 +720,14 @@ cdef class LargeListScalar(ListScalar):
     pass
 
 
+cdef class ListViewScalar(ListScalar):
+    pass
+
+
+cdef class LargeListViewScalar(ListScalar):
+    pass
+
+
 cdef class StructScalar(Scalar, collections.abc.Mapping):
     """
     Concrete class for struct scalars.
@@ -1108,6 +1116,8 @@ cdef dict _scalar_classes = {
     _Type_LIST: ListScalar,
     _Type_LARGE_LIST: LargeListScalar,
     _Type_FIXED_SIZE_LIST: FixedSizeListScalar,
+    _Type_LIST_VIEW: ListViewScalar,
+    _Type_LARGE_LIST_VIEW: LargeListViewScalar,
     _Type_STRUCT: StructScalar,
     _Type_MAP: MapScalar,
     _Type_DICTIONARY: DictionaryScalar,
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index f851d4e0b6c29..bd9ae214b041e 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -3573,3 +3573,74 @@ def test_run_end_encoded_from_buffers():
     with pytest.raises(ValueError):
         pa.RunEndEncodedArray.from_buffers(ree_type, length, buffers,
                                            1, offset, children)
+
+
+@pytest.mark.parametrize(('list_array_type'),
+                         [pa.ListViewArray, pa.LargeListViewArray])
+def test_list_view_from_arrays(list_array_type):
+    # test in order offsets, similar to ListArray representation
+    values = [1, 2, 3, 4, 5, 6, None, 7]
+    offsets = [0, 2, 4, 6]
+    sizes = [2, 2, 2, 2]
+    array = list_array_type.from_arrays(offsets, sizes, values)
+
+    assert array.to_pylist() == [[1, 2], [3, 4], [5, 6], [None, 7]]
+    assert array.values.to_pylist() == values
+    assert array.offsets.to_pylist() == offsets
+    assert array.sizes.to_pylist() == sizes
+
+    # test out of order offsets with overlapping values
+    values = [1, 2, 3, 4]
+    offsets = [2, 1, 0]
+    sizes = [2, 2, 2]
+    array = list_array_type.from_arrays(offsets, sizes, values)
+
+    assert array.to_pylist() == [[3, 4], [2, 3], [1, 2]]
+    assert array.values.to_pylist() == values
+    assert array.offsets.to_pylist() == offsets
+    assert array.sizes.to_pylist() == sizes
+
+    # test null offsets and empty list values
+    values = []
+    offsets = [0, None]
+    sizes = [0, 0]
+    array = list_array_type.from_arrays(offsets, sizes, values)
+
+    assert array.to_pylist() == [[], None]
+    assert array.values.to_pylist() == values
+    assert array.offsets.to_pylist() == [0, 0]
+    assert array.sizes.to_pylist() == sizes
+
+    # test null sizes and empty list values
+    values = []
+    offsets = [0, 0]
+    sizes = [None, 0]
+    array = list_array_type.from_arrays(offsets, sizes, values)
+
+    assert array.to_pylist() == [None, []]
+    assert array.values.to_pylist() == values
+    assert array.offsets.to_pylist() == offsets
+    assert array.sizes.to_pylist() == [0, 0]
+
+    # test null bitmask
+    values = [1, 2]
+    offsets = [0, 0, 1]
+    sizes = [1, 0, 1]
+    mask = pa.array([False, True, False])
+    array = list_array_type.from_arrays(offsets, sizes, values, mask=mask)
+
+    assert array.to_pylist() == [[1], None, [2]]
+    assert array.values.to_pylist() == values
+    assert array.offsets.to_pylist() == offsets
+    assert array.sizes.to_pylist() == sizes
+
+
+@pytest.mark.parametrize(('list_array_type'),
+                         [pa.ListViewArray, pa.LargeListViewArray])
+def test_list_view_flatten(list_array_type):
+    values = [1, 2, 3, 4]
+    offsets = [3, 2, 1, 0]
+    sizes = [1, 1, 1, 1]
+    array = list_array_type.from_arrays(offsets, sizes, values)
+
+    assert array.flatten().to_pylist() == [4, 3, 2, 1]
diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py
index 8cec8783280dd..39dac4eb81dfb 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -154,6 +154,8 @@ def test_set_timezone_db_path_non_windows():
     pa.ListType,
     pa.LargeListType,
     pa.FixedSizeListType,
+    pa.ListViewType,
+    pa.LargeListViewType,
     pa.UnionType,
     pa.SparseUnionType,
     pa.DenseUnionType,
@@ -227,6 +229,8 @@ def test_set_timezone_db_path_non_windows():
     pa.StringViewScalar,
     pa.ListScalar,
     pa.LargeListScalar,
+    pa.ListViewScalar,
+    pa.LargeListViewScalar,
     pa.MapScalar,
     pa.FixedSizeListScalar,
     pa.UnionScalar,
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index eed5f045be945..074fb757e265a 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -57,6 +57,9 @@
     ([1, 2, 3], None, pa.ListScalar),
     ([1, 2, 3, 4], pa.large_list(pa.int8()), pa.LargeListScalar),
     ([1, 2, 3, 4, 5], pa.list_(pa.int8(), 5), pa.FixedSizeListScalar),
+    # TODO GH-39855
+    # ([1, 2, 3], pa.list_view(pa.int8()), pa.ListViewScalar),
+    # ([1, 2, 3, 4], pa.large_list_view(pa.int8()), pa.LargeListViewScalar),
     (datetime.date.today(), None, pa.Date32Scalar),
     (datetime.date.today(), pa.date64(), pa.Date64Scalar),
     (datetime.datetime.now(), None, pa.TimestampScalar),
@@ -537,7 +540,10 @@ def test_fixed_size_binary():
 
 @pytest.mark.parametrize(('ty', 'klass'), [
     (pa.list_(pa.string()), pa.ListScalar),
-    (pa.large_list(pa.string()), pa.LargeListScalar)
+    (pa.large_list(pa.string()), pa.LargeListScalar),
+    # TODO GH-39855
+    # (pa.list_view(pa.string()), pa.ListViewScalar),
+    # (pa.large_list_view(pa.string()), pa.LargeListViewScalar)
 ])
 def test_list(ty, klass):
     v = ['foo', None]
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index a5ab3128dc874..0add5786088d3 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -66,6 +66,8 @@ def get_many_types():
         pa.list_(pa.int32()),
         pa.list_(pa.int32(), 2),
         pa.large_list(pa.uint16()),
+        pa.list_view(pa.int32()),
+        pa.large_list_view(pa.uint16()),
         pa.map_(pa.string(), pa.int32()),
         pa.map_(pa.field('key', pa.int32(), nullable=False),
                 pa.field('value', pa.int32())),
@@ -169,6 +171,18 @@ def test_is_list():
     assert not types.is_list(pa.int32())
 
 
+def test_is_list_view():
+    a = pa.list_view(pa.int32())
+    b = pa.large_list_view(pa.int32())
+
+    assert types.is_list_view(a)
+    assert not types.is_large_list_view(a)
+    assert not types.is_list(a)
+    assert types.is_large_list_view(b)
+    assert not types.is_list_view(b)
+    assert not types.is_large_list(b)
+
+
 def test_is_map():
     m = pa.map_(pa.utf8(), pa.int32())
 
@@ -573,6 +587,41 @@ def test_large_list_type():
         pa.large_list(None)
 
 
+def test_list_view_type():
+    ty = pa.list_view(pa.int64())
+    assert isinstance(ty, pa.ListViewType)
+    assert ty.value_type == pa.int64()
+    assert ty.value_field == pa.field("item", pa.int64(), nullable=True)
+
+    # nullability matters in comparison
+    ty_non_nullable = pa.list_view(pa.field("item", pa.int64(), nullable=False))
+    assert ty != ty_non_nullable
+
+    # field names don't matter by default
+    ty_named = pa.list_view(pa.field("element", pa.int64()))
+    assert ty == ty_named
+    assert not ty.equals(ty_named, check_metadata=True)
+
+    # metadata doesn't matter by default
+    ty_metadata = pa.list_view(
+        pa.field("item", pa.int64(), metadata={"hello": "world"}))
+    assert ty == ty_metadata
+    assert not ty.equals(ty_metadata, check_metadata=True)
+
+    with pytest.raises(TypeError):
+        pa.list_view(None)
+
+
+def test_large_list_view_type():
+    ty = pa.large_list_view(pa.utf8())
+    assert isinstance(ty, pa.LargeListViewType)
+    assert ty.value_type == pa.utf8()
+    assert ty.value_field == pa.field("item", pa.utf8(), nullable=True)
+
+    with pytest.raises(TypeError):
+        pa.large_list_view(None)
+
+
 def test_map_type():
     ty = pa.map_(pa.utf8(), pa.int32())
     assert isinstance(ty, pa.MapType)
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index b9ba157a327a5..50b10c5512dc1 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -557,6 +557,101 @@ cdef class LargeListType(DataType):
         return pyarrow_wrap_data_type(self.list_type.value_type())
 
 
+cdef class ListViewType(DataType):
+    """
+    Concrete class for list view data types.
+
+    Examples
+    --------
+    Create an instance of ListViewType:
+
+    >>> import pyarrow as pa
+    >>> pa.list_view(pa.string())
+    ListViewType(list_view<item: string>)
+    """
+
+    cdef void init(self, const shared_ptr[CDataType]& type) except *:
+        DataType.init(self, type)
+        self.list_view_type = <const CListViewType*> type.get()
+
+    def __reduce__(self):
+        return list_view, (self.value_field,)
+
+    @property
+    def value_field(self):
+        """
+        The field for list view values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_view(pa.string()).value_field
+        pyarrow.Field<item: string>
+        """
+        return pyarrow_wrap_field(self.list_view_type.value_field())
+
+    @property
+    def value_type(self):
+        """
+        The data type of list view values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_view(pa.string()).value_type
+        DataType(string)
+        """
+        return pyarrow_wrap_data_type(self.list_view_type.value_type())
+
+
+cdef class LargeListViewType(DataType):
+    """
+    Concrete class for large list view data types
+    (like ListViewType, but with 64-bit offsets).
+
+    Examples
+    --------
+    Create an instance of LargeListViewType:
+
+    >>> import pyarrow as pa
+    >>> pa.large_list_view(pa.string())
+    LargeListViewType(large_list_view<item: string>)
+    """
+
+    cdef void init(self, const shared_ptr[CDataType]& type) except *:
+        DataType.init(self, type)
+        self.list_view_type = <const CLargeListViewType*> type.get()
+
+    def __reduce__(self):
+        return large_list_view, (self.value_field,)
+
+    @property
+    def value_field(self):
+        """
+        The field for large list view values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.large_list_view(pa.string()).value_field
+        pyarrow.Field<item: string>
+        """
+        return pyarrow_wrap_field(self.list_view_type.value_field())
+
+    @property
+    def value_type(self):
+        """
+        The data type of large list view values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.large_list_view(pa.string()).value_type
+        DataType(string)
+        """
+        return pyarrow_wrap_data_type(self.list_view_type.value_type())
+
+
 cdef class MapType(DataType):
     """
     Concrete class for map data types.
@@ -4517,6 +4612,82 @@ cpdef LargeListType large_list(value_type):
     return out
 
 
+cpdef ListViewType list_view(value_type):
+    """
+    Create ListViewType instance from child data type or field.
+
+    This data type may not be supported by all Arrow implementations
+    because it is an alternative to the ListType.
+
+    Parameters
+    ----------
+    value_type : DataType or Field
+
+    Returns
+    -------
+    list_view_type : DataType
+
+    Examples
+    --------
+    Create an instance of ListViewType:
+
+    >>> import pyarrow as pa
+    >>> pa.list_view(pa.string())
+    ListViewType(list_view<item: string>)
+    """
+    cdef:
+        Field _field
+        shared_ptr[CDataType] list_view_type
+
+    if isinstance(value_type, DataType):
+        _field = field('item', value_type)
+    elif isinstance(value_type, Field):
+        _field = value_type
+    else:
+        raise TypeError('ListView requires DataType or Field')
+
+    list_view_type = CMakeListViewType(_field.sp_field)
+    return pyarrow_wrap_data_type(list_view_type)
+
+
+cpdef LargeListViewType large_list_view(value_type):
+    """
+    Create LargeListViewType instance from child data type or field.
+
+    This data type may not be supported by all Arrow implementations
+    because it is an alternative to the ListType.
+
+    Parameters
+    ----------
+    value_type : DataType or Field
+
+    Returns
+    -------
+    list_view_type : DataType
+
+    Examples
+    --------
+    Create an instance of LargeListViewType:
+
+    >>> import pyarrow as pa
+    >>> pa.large_list_view(pa.int8())
+    LargeListViewType(large_list_view<item: int8>)
+    """
+    cdef:
+        Field _field
+        shared_ptr[CDataType] list_view_type
+
+    if isinstance(value_type, DataType):
+        _field = field('item', value_type)
+    elif isinstance(value_type, Field):
+        _field = value_type
+    else:
+        raise TypeError('LargeListView requires DataType or Field')
+
+    list_view_type = CMakeLargeListViewType(_field.sp_field)
+    return pyarrow_wrap_data_type(list_view_type)
+
+
 cpdef MapType map_(key_type, item_type, keys_sorted=False):
     """
     Create MapType instance from key and item data types or fields.
diff --git a/python/pyarrow/types.py b/python/pyarrow/types.py
index 32398dac9c5f5..0f68ca9fe574b 100644
--- a/python/pyarrow/types.py
+++ b/python/pyarrow/types.py
@@ -151,6 +151,16 @@ def is_fixed_size_list(t):
     return t.id == lib.Type_FIXED_SIZE_LIST
 
 
+@doc(is_null, datatype="list view")
+def is_list_view(t):
+    return t.id == lib.Type_LIST_VIEW
+
+
+@doc(is_null, datatype="large list view")
+def is_large_list_view(t):
+    return t.id == lib.Type_LARGE_LIST_VIEW
+
+
 @doc(is_null, datatype="struct")
 def is_struct(t):
     return t.id == lib.Type_STRUCT

From 30f6fdbbd32b77f0351c08732c395bbd28af2850 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 8 Feb 2024 16:28:25 +0100
Subject: [PATCH 68/74] GH-39996: [Archery] Fix Crossbow build on a PR from a
 fork's main branch (#39997)

### Rationale for this change

### What changes are included in this PR?

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #39996

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 dev/archery/archery/bot.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/dev/archery/archery/bot.py b/dev/archery/archery/bot.py
index 4e5104362254c..caab824aeb38f 100644
--- a/dev/archery/archery/bot.py
+++ b/dev/archery/archery/bot.py
@@ -324,7 +324,8 @@ def crossbow(obj, crossbow):
     obj['crossbow_repo'] = crossbow
 
 
-def _clone_arrow_and_crossbow(dest, crossbow_repo, pull_request):
+def _clone_arrow_and_crossbow(dest, crossbow_repo, arrow_repo_url,
+                              pr_number, pr_branch):
     """
     Clone the repositories and initialize crossbow objects.
 
@@ -338,22 +339,25 @@ def _clone_arrow_and_crossbow(dest, crossbow_repo, pull_request):
         Object containing information about the pull request the comment bot
         was triggered from.
     """
+    bare_arrow_path = dest / 'arrow_bare'
     arrow_path = dest / 'arrow'
     queue_path = dest / 'crossbow'
 
-    # clone arrow and checkout the pull request's branch
-    pull_request_ref = 'pull/{}/head:{}'.format(
-        pull_request.number, pull_request.head.ref
-    )
-    git.clone(pull_request.base.repo.clone_url, str(arrow_path))
-    git.fetch('origin', pull_request_ref, git_dir=arrow_path)
-    git.checkout(pull_request.head.ref, git_dir=arrow_path)
-
-    # clone crossbow repository
+    # 1. clone arrow and checkout the PR's branch
+    pr_ref = f'pull/{pr_number}/head:{pr_branch}'
+    # we do a bare clone of upstream arrow to avoid issues when the PR is
+    # submitted from a fork's main branch (GH-39996)
+    git.clone('--bare', arrow_repo_url, str(bare_arrow_path))
+    # fetch the PR's branch into the bare clone
+    git.fetch('origin', pr_ref, git_dir=bare_arrow_path)
+    # clone and checkout the PR's branch into a full local repo
+    git.clone(f'--branch={pr_branch}', bare_arrow_path, arrow_path)
+
+    # 2. clone crossbow repository
     crossbow_url = 'https://github.com/{}'.format(crossbow_repo)
     git.clone(crossbow_url, str(queue_path))
 
-    # initialize crossbow objects
+    # 3. initialize crossbow objects
     github_token = os.environ['CROSSBOW_GITHUB_TOKEN']
     arrow = Repo(arrow_path)
     queue = Queue(queue_path, github_token=github_token, require_https=True)
@@ -385,7 +389,9 @@ def submit(obj, tasks, groups, params, arrow_version, wait):
         arrow, queue = _clone_arrow_and_crossbow(
             dest=Path(tmpdir),
             crossbow_repo=crossbow_repo,
-            pull_request=pull_request,
+            arrow_repo_url=pull_request.base.repo.clone_url,
+            pr_number=pull_request.number,
+            pr_branch=pull_request.head.ref,
         )
         # load available tasks configuration and groups from yaml
         config = Config.load_yaml(arrow.path / "dev" / "tasks" / "tasks.yml")

From 98c422557cfe33e714ea009a270aab98818e2748 Mon Sep 17 00:00:00 2001
From: Rossi Sun <zanmato1984@gmail.com>
Date: Fri, 9 Feb 2024 00:05:50 +0800
Subject: [PATCH 69/74] GH-39976: [C++] Fix out-of-line data size calculation
 in BinaryViewBuilder::AppendArraySlice (#39994)

### Rationale for this change

Fix the bug in `BinaryViewBuilder::AppendArraySlice` that, when calculating out-of-line data size, the array is wrongly iterated.

### What changes are included in this PR?

Fix and UT.

### Are these changes tested?

UT included.

### Are there any user-facing changes?

No.

* Closes: #39976

Authored-by: Ruoxi Sun <zanmato1984@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/array/array_test.cc     | 23 +++++++++++++++++++++++
 cpp/src/arrow/array/builder_binary.cc |  2 +-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index e9d478f108584..21ac1a09f56e7 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -905,6 +905,29 @@ TEST_F(TestArray, TestAppendArraySlice) {
   }
 }
 
+// GH-39976: Test out-of-line data size calculation in
+// BinaryViewBuilder::AppendArraySlice.
+TEST_F(TestArray, TestBinaryViewAppendArraySlice) {
+  BinaryViewBuilder src_builder(pool_);
+  ASSERT_OK(src_builder.AppendNull());
+  ASSERT_OK(src_builder.Append("long string; not inlined"));
+  ASSERT_EQ(2, src_builder.length());
+  ASSERT_OK_AND_ASSIGN(auto src, src_builder.Finish());
+  ASSERT_OK(src->ValidateFull());
+
+  ArraySpan span;
+  span.SetMembers(*src->data());
+  BinaryViewBuilder dst_builder(pool_);
+  ASSERT_OK(dst_builder.AppendArraySlice(span, 0, 1));
+  ASSERT_EQ(1, dst_builder.length());
+  ASSERT_OK(dst_builder.AppendArraySlice(span, 1, 1));
+  ASSERT_EQ(2, dst_builder.length());
+  ASSERT_OK_AND_ASSIGN(auto dst, dst_builder.Finish());
+  ASSERT_OK(dst->ValidateFull());
+
+  AssertArraysEqual(*src, *dst);
+}
+
 TEST_F(TestArray, ValidateBuffersPrimitive) {
   auto empty_buffer = std::make_shared<Buffer>("");
   auto null_buffer = Buffer::FromString("\xff");
diff --git a/cpp/src/arrow/array/builder_binary.cc b/cpp/src/arrow/array/builder_binary.cc
index f85852fa0eda6..7e5721917f3a0 100644
--- a/cpp/src/arrow/array/builder_binary.cc
+++ b/cpp/src/arrow/array/builder_binary.cc
@@ -54,7 +54,7 @@ Status BinaryViewBuilder::AppendArraySlice(const ArraySpan& array, int64_t offse
 
   int64_t out_of_line_total = 0, i = 0;
   VisitNullBitmapInline(
-      array.buffers[0].data, array.offset, array.length, array.null_count,
+      array.buffers[0].data, array.offset + offset, length, array.null_count,
       [&] {
         if (!values[i].is_inline()) {
           out_of_line_total += static_cast<int64_t>(values[i].size());

From a946214b127ff50ea0cf7e68946c186fa66009a2 Mon Sep 17 00:00:00 2001
From: Rossi Sun <zanmato1984@gmail.com>
Date: Fri, 9 Feb 2024 01:02:04 +0800
Subject: [PATCH 70/74] GH-39973: [C++][CI] Disable debug memory pool for ASAN
 and Valgrind (#39975)

### Rationale for this change

Disable debug memory pool for ASAN and Valgrind so that they can detect more subtle memory issues regarding to buffer tail bytes.

### What changes are included in this PR?

1. Add a `none` option to debug memory pool env var to make other things slightly easier.
2. Change `*_test.sh` scripts to conditionally set debug memory pool env var.
3. Top-level docker compose change to pass none to debug memory pool env var for ASAN and Valgrind.

### Are these changes tested?

The CI should cover it well.

### Are there any user-facing changes?

No.

* Closes: #39973

Authored-by: Ruoxi Sun <zanmato1984@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 ci/appveyor-cpp-build.bat           |  5 ++++-
 ci/scripts/c_glib_test.sh           |  6 ++++--
 ci/scripts/cpp_test.sh              |  6 ++++--
 ci/scripts/python_test.sh           |  6 ++++--
 ci/scripts/r_test.sh                |  6 ++++--
 ci/scripts/ruby_test.sh             |  6 ++++--
 cpp/src/arrow/memory_pool.cc        |  4 ++--
 docker-compose.yml                  |  4 ++++
 docs/source/cpp/env_vars.rst        |  4 +++-
 python/pyarrow/tests/test_memory.py | 30 +++++++++++++++++++++++++----
 10 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index 5e561a0461ea3..ab85032fe9924 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -26,7 +26,10 @@ git submodule update --init || exit /B
 set ARROW_TEST_DATA=%CD%\testing\data
 set PARQUET_TEST_DATA=%CD%\cpp\submodules\parquet-testing\data
 
-set ARROW_DEBUG_MEMORY_POOL=trap
+@rem Enable memory debug checks if the env is not set already
+IF "%ARROW_DEBUG_MEMORY_POOL%"=="" (
+  set ARROW_DEBUG_MEMORY_POOL=trap
+)
 
 set CMAKE_BUILD_PARALLEL_LEVEL=%NUMBER_OF_PROCESSORS%
 set CTEST_PARALLEL_LEVEL=%NUMBER_OF_PROCESSORS%
diff --git a/ci/scripts/c_glib_test.sh b/ci/scripts/c_glib_test.sh
index cea600191ae05..f8083c7759d8a 100755
--- a/ci/scripts/c_glib_test.sh
+++ b/ci/scripts/c_glib_test.sh
@@ -28,8 +28,10 @@ export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
 export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
 export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0
 
-# Enable memory debug checks.
-export ARROW_DEBUG_MEMORY_POOL=trap
+# Enable memory debug checks if the env is not set already
+if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
+  export ARROW_DEBUG_MEMORY_POOL=trap
+fi
 
 pushd ${source_dir}
 
diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh
index 0c6e1c6ef7057..1d685c51a9326 100755
--- a/ci/scripts/cpp_test.sh
+++ b/ci/scripts/cpp_test.sh
@@ -37,8 +37,10 @@ export LD_LIBRARY_PATH=${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib}:${LD_LIBRARY_P
 # to retrieve metadata. Disable this so that S3FileSystem tests run faster.
 export AWS_EC2_METADATA_DISABLED=TRUE
 
-# Enable memory debug checks.
-export ARROW_DEBUG_MEMORY_POOL=trap
+# Enable memory debug checks if the env is not set already
+if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
+  export ARROW_DEBUG_MEMORY_POOL=trap
+fi
 
 ctest_options=()
 case "$(uname)" in
diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh
index 341c2dd0577ef..8dfedb2880b50 100755
--- a/ci/scripts/python_test.sh
+++ b/ci/scripts/python_test.sh
@@ -32,8 +32,10 @@ export ARROW_GDB_SCRIPT=${arrow_dir}/cpp/gdb_arrow.py
 # Enable some checks inside Python itself
 export PYTHONDEVMODE=1
 
-# Enable memory debug checks.
-export ARROW_DEBUG_MEMORY_POOL=trap
+# Enable memory debug checks if the env is not set already
+if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
+  export ARROW_DEBUG_MEMORY_POOL=trap
+fi
 
 # By default, force-test all optional components
 : ${PYARROW_TEST_ACERO:=${ARROW_ACERO:-ON}}
diff --git a/ci/scripts/r_test.sh b/ci/scripts/r_test.sh
index 22ec551edb9fa..72078ab3c06c2 100755
--- a/ci/scripts/r_test.sh
+++ b/ci/scripts/r_test.sh
@@ -72,8 +72,10 @@ export _R_CHECK_STOP_ON_INVALID_NUMERIC_VERSION_INPUTS_=TRUE
 # to retrieve metadata. Disable this so that S3FileSystem tests run faster.
 export AWS_EC2_METADATA_DISABLED=TRUE
 
-# Enable memory debug checks.
-export ARROW_DEBUG_MEMORY_POOL=trap
+# Enable memory debug checks if the env is not set already
+if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
+  export ARROW_DEBUG_MEMORY_POOL=trap
+fi
 
 # Hack so that texlive2020 doesn't pollute the home dir
 export TEXMFCONFIG=/tmp/texmf-config
diff --git a/ci/scripts/ruby_test.sh b/ci/scripts/ruby_test.sh
index 4fd6a85fe3966..56c33a4d6378a 100755
--- a/ci/scripts/ruby_test.sh
+++ b/ci/scripts/ruby_test.sh
@@ -26,7 +26,9 @@ export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
 export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
 export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0
 
-# Enable memory debug checks.
-export ARROW_DEBUG_MEMORY_POOL=trap
+# Enable memory debug checks if the env is not set already
+if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
+  export ARROW_DEBUG_MEMORY_POOL=trap
+fi
 
 rake -f ${source_dir}/Rakefile BUILD_DIR=${build_dir} USE_BUNDLER=yes
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 843329c17bc28..d58c203d2ae27 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -195,7 +195,7 @@ bool IsDebugEnabled() {
       return false;
     }
     auto env_value = *std::move(maybe_env_value);
-    if (env_value.empty()) {
+    if (env_value.empty() || env_value == "none") {
       return false;
     }
     auto debug_state = DebugState::Instance();
@@ -212,7 +212,7 @@ bool IsDebugEnabled() {
       return true;
     }
     ARROW_LOG(WARNING) << "Invalid value for " << kDebugMemoryEnvVar << ": '" << env_value
-                       << "'. Valid values are 'abort', 'trap', 'warn'.";
+                       << "'. Valid values are 'abort', 'trap', 'warn', 'none'.";
     return false;
   }();
 
diff --git a/docker-compose.yml b/docker-compose.yml
index a31fa0d9aa659..7ae625a017417 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -320,6 +320,8 @@ services:
       # Shrink test runtime by enabling minimal optimizations
       ARROW_C_FLAGS_DEBUG: "-g1 -Og"
       ARROW_CXX_FLAGS_DEBUG: "-g1 -Og"
+      # GH-39973: Do not use debug memory pool for valgrind
+      ARROW_DEBUG_MEMORY_POOL: "none"
       ARROW_ENABLE_TIMING_TESTS:  # inherit
       ARROW_FLIGHT: "OFF"
       ARROW_FLIGHT_SQL: "OFF"
@@ -598,6 +600,8 @@ services:
       CXX: clang++-${CLANG_TOOLS}
       # Avoid creating huge static libraries
       ARROW_BUILD_STATIC: "OFF"
+      # GH-39973: Do not use debug memory pool for ASAN
+      ARROW_DEBUG_MEMORY_POOL: "none"
       ARROW_ENABLE_TIMING_TESTS:  # inherit
       # GH-33920: Disable Flight SQL to reduce build time.
       # We'll be able to re-enable this with Ubuntu 24.04 because
diff --git a/docs/source/cpp/env_vars.rst b/docs/source/cpp/env_vars.rst
index 0fa80aa1106c1..eb7c797df5e27 100644
--- a/docs/source/cpp/env_vars.rst
+++ b/docs/source/cpp/env_vars.rst
@@ -58,8 +58,10 @@ that changing their value later will have an effect.
    - ``abort`` exits the processus with a non-zero return value;
    - ``trap`` issues a platform-specific debugger breakpoint / trap instruction;
    - ``warn`` prints a warning on stderr and continues execution;
+   - ``none`` disables memory checks;
 
-   If this variable is not set, or has empty an value, memory checks are disabled.
+   If this variable is not set, or has an empty value, it has the same effect
+   as the value ``none`` - memory checks are disabled.
 
    .. note::
       While this functionality can be useful and has little overhead, it
diff --git a/python/pyarrow/tests/test_memory.py b/python/pyarrow/tests/test_memory.py
index d9fdeb152c35e..4f199952344f2 100644
--- a/python/pyarrow/tests/test_memory.py
+++ b/python/pyarrow/tests/test_memory.py
@@ -243,13 +243,35 @@ def test_debug_memory_pool_warn(pool_factory):
     assert "Wrong size on deallocation" in res.stderr
 
 
-@pytest.mark.parametrize('pool_factory', supported_factories())
-def test_debug_memory_pool_disabled(pool_factory):
-    res = run_debug_memory_pool(pool_factory.__name__, "")
+def check_debug_memory_pool_disabled(pool_factory, env_value, msg):
+    res = run_debug_memory_pool(pool_factory.__name__, env_value)
     # The subprocess either returned successfully or was killed by a signal
     # (due to writing out of bounds), depending on the underlying allocator.
     if os.name == "posix":
         assert res.returncode <= 0
     else:
         res.check_returncode()
-    assert res.stderr == ""
+    if msg == "":
+        assert res.stderr == ""
+    else:
+        assert msg in res.stderr
+
+
+@pytest.mark.parametrize('pool_factory', supported_factories())
+def test_debug_memory_pool_none(pool_factory):
+    check_debug_memory_pool_disabled(pool_factory, "none", "")
+
+
+@pytest.mark.parametrize('pool_factory', supported_factories())
+def test_debug_memory_pool_empty(pool_factory):
+    check_debug_memory_pool_disabled(pool_factory, "", "")
+
+
+@pytest.mark.parametrize('pool_factory', supported_factories())
+def test_debug_memory_pool_unknown(pool_factory):
+    env_value = "some_arbitrary_value"
+    msg = (
+        f"Invalid value for ARROW_DEBUG_MEMORY_POOL: '{env_value}'. "
+        "Valid values are 'abort', 'trap', 'warn', 'none'."
+    )
+    check_debug_memory_pool_disabled(pool_factory, env_value, msg)

From de3cdc00c21fd3e9d8d763099591f23720ca8d1f Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 8 Feb 2024 18:45:40 +0100
Subject: [PATCH 71/74] GH-39962: [C++] Small CSV reader refactoring (#39963)

### What changes are included in this PR?

Factor our some shared functionality in the CSV reader to avoid code duplication.

### Are these changes tested?

Yes, by existing tests.

### Are there any user-facing changes?

No.

* Closes: #39962

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/csv/reader.cc | 144 +++++++++++-------------------------
 1 file changed, 45 insertions(+), 99 deletions(-)

diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index 1ac25e290a814..e981fafe8e780 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -445,16 +445,20 @@ class BlockParsingOperator {
       num_rows_seen_ += parser->total_num_rows();
     }
 
-    RETURN_NOT_OK(block.consume_bytes(parsed_size));
+    if (block.consume_bytes) {
+      RETURN_NOT_OK(block.consume_bytes(parsed_size));
+    }
     return ParsedBlock{std::move(parser), block.block_index,
                        static_cast<int64_t>(parsed_size) + block.bytes_skipped};
   }
 
+  int num_csv_cols() const { return num_csv_cols_; }
+
  private:
   io::IOContext io_context_;
-  ParseOptions parse_options_;
-  int num_csv_cols_;
-  bool count_rows_;
+  const ParseOptions parse_options_;
+  const int num_csv_cols_;
+  const bool count_rows_;
   int64_t num_rows_seen_;
 };
 
@@ -570,7 +574,6 @@ class ReaderMixin {
         parse_options_(parse_options),
         convert_options_(convert_options),
         count_rows_(count_rows),
-        num_rows_seen_(count_rows_ ? 1 : -1),
         input_(std::move(input)) {}
 
  protected:
@@ -581,6 +584,7 @@ class ReaderMixin {
     const uint8_t* data = buf->data();
     const auto data_end = data + buf->size();
     DCHECK_GT(data_end - data, 0);
+    int64_t num_rows_seen = 1;
 
     if (read_options_.skip_rows) {
       // Skip initial rows (potentially invalid CSV data)
@@ -593,14 +597,14 @@ class ReaderMixin {
             "either file is too short or header is larger than block size");
       }
       if (count_rows_) {
-        num_rows_seen_ += num_skipped_rows;
+        num_rows_seen += num_skipped_rows;
       }
     }
 
     if (read_options_.column_names.empty()) {
       // Parse one row (either to read column names or to know the number of columns)
-      BlockParser parser(io_context_.pool(), parse_options_, num_csv_cols_,
-                         num_rows_seen_, 1);
+      BlockParser parser(io_context_.pool(), parse_options_, /*num_cols=*/-1,
+                         /*first_row=*/num_rows_seen, /*max_num_rows=*/1);
       uint32_t parsed_size = 0;
       RETURN_NOT_OK(parser.Parse(
           std::string_view(reinterpret_cast<const char*>(data), data_end - data),
@@ -627,7 +631,7 @@ class ReaderMixin {
         // Skip parsed header row
         data += parsed_size;
         if (count_rows_) {
-          ++num_rows_seen_;
+          ++num_rows_seen;
         }
       }
     } else {
@@ -636,14 +640,17 @@ class ReaderMixin {
 
     if (count_rows_) {
       // increase rows seen to skip past rows which will be skipped
-      num_rows_seen_ += read_options_.skip_rows_after_names;
+      num_rows_seen += read_options_.skip_rows_after_names;
     }
 
     auto bytes_consumed = data - buf->data();
     *rest = SliceBuffer(buf, bytes_consumed);
 
-    num_csv_cols_ = static_cast<int32_t>(column_names_.size());
-    DCHECK_GT(num_csv_cols_, 0);
+    int32_t num_csv_cols = static_cast<int32_t>(column_names_.size());
+    DCHECK_GT(num_csv_cols, 0);
+    // Since we know the number of columns, we can instantiate the BlockParsingOperator
+    parsing_operator_.emplace(io_context_, parse_options_, num_csv_cols,
+                              count_rows_ ? num_rows_seen : -1);
 
     RETURN_NOT_OK(MakeConversionSchema());
     return bytes_consumed;
@@ -691,7 +698,7 @@ class ReaderMixin {
 
     if (convert_options_.include_columns.empty()) {
       // Include all columns in CSV file order
-      for (int32_t col_index = 0; col_index < num_csv_cols_; ++col_index) {
+      for (int32_t col_index = 0; col_index < num_csv_cols(); ++col_index) {
         append_csv_column(column_names_[col_index], col_index);
       }
     } else {
@@ -719,66 +726,25 @@ class ReaderMixin {
     return Status::OK();
   }
 
-  struct ParseResult {
-    std::shared_ptr<BlockParser> parser;
-    int64_t parsed_bytes;
-  };
-
-  Result<ParseResult> Parse(const std::shared_ptr<Buffer>& partial,
-                            const std::shared_ptr<Buffer>& completion,
-                            const std::shared_ptr<Buffer>& block, int64_t block_index,
-                            bool is_final) {
-    static constexpr int32_t max_num_rows = std::numeric_limits<int32_t>::max();
-    auto parser = std::make_shared<BlockParser>(
-        io_context_.pool(), parse_options_, num_csv_cols_, num_rows_seen_, max_num_rows);
-
-    std::shared_ptr<Buffer> straddling;
-    std::vector<std::string_view> views;
-    if (partial->size() != 0 || completion->size() != 0) {
-      if (partial->size() == 0) {
-        straddling = completion;
-      } else if (completion->size() == 0) {
-        straddling = partial;
-      } else {
-        ARROW_ASSIGN_OR_RAISE(
-            straddling, ConcatenateBuffers({partial, completion}, io_context_.pool()));
-      }
-      views = {std::string_view(*straddling), std::string_view(*block)};
-    } else {
-      views = {std::string_view(*block)};
-    }
-    uint32_t parsed_size;
-    if (is_final) {
-      RETURN_NOT_OK(parser->ParseFinal(views, &parsed_size));
-    } else {
-      RETURN_NOT_OK(parser->Parse(views, &parsed_size));
-    }
-    // See BlockParsingOperator for explanation.
-    const int64_t bytes_before_buffer = partial->size() + completion->size();
-    if (static_cast<int64_t>(parsed_size) < bytes_before_buffer) {
-      return Status::Invalid(
-          "CSV parser got out of sync with chunker. This can mean the data file "
-          "contains cell values spanning multiple lines; please consider enabling "
-          "the option 'newlines_in_values'.");
-    }
+  Result<ParsedBlock> Parse(const CSVBlock& block) {
+    DCHECK(parsing_operator_.has_value());
+    return (*parsing_operator_)(block);
+  }
 
-    if (count_rows_) {
-      num_rows_seen_ += parser->total_num_rows();
-    }
-    return ParseResult{std::move(parser), static_cast<int64_t>(parsed_size)};
+  int num_csv_cols() const {
+    DCHECK(parsing_operator_.has_value());
+    return parsing_operator_->num_csv_cols();
   }
 
   io::IOContext io_context_;
-  ReadOptions read_options_;
-  ParseOptions parse_options_;
-  ConvertOptions convert_options_;
-
-  // Number of columns in the CSV file
-  int32_t num_csv_cols_ = -1;
-  // Whether num_rows_seen_ tracks the number of rows seen in the CSV being parsed
-  bool count_rows_;
-  // Number of rows seen in the csv. Not used if count_rows is false
-  int64_t num_rows_seen_;
+  const ReadOptions read_options_;
+  const ParseOptions parse_options_;
+  const ConvertOptions convert_options_;
+  // Whether to track the number of rows seen in the CSV being parsed
+  const bool count_rows_;
+
+  std::optional<BlockParsingOperator> parsing_operator_;
+
   // Column names in the CSV file
   std::vector<std::string> column_names_;
   ConversionSchema conversion_schema_;
@@ -822,14 +788,10 @@ class BaseTableReader : public ReaderMixin, public csv::TableReader {
     return Status::OK();
   }
 
-  Result<int64_t> ParseAndInsert(const std::shared_ptr<Buffer>& partial,
-                                 const std::shared_ptr<Buffer>& completion,
-                                 const std::shared_ptr<Buffer>& block,
-                                 int64_t block_index, bool is_final) {
-    ARROW_ASSIGN_OR_RAISE(auto result,
-                          Parse(partial, completion, block, block_index, is_final));
-    RETURN_NOT_OK(ProcessData(result.parser, block_index));
-    return result.parsed_bytes;
+  Status ParseAndInsert(const CSVBlock& block) {
+    ARROW_ASSIGN_OR_RAISE(auto result, Parse(block));
+    RETURN_NOT_OK(ProcessData(result.parser, result.block_index));
+    return Status::OK();
   }
 
   // Trigger conversion of parsed block data
@@ -921,8 +883,6 @@ class StreamingReaderImpl : public ReaderMixin,
                           ProcessHeader(first_buffer, &after_header));
     bytes_decoded_->fetch_add(header_bytes_consumed);
 
-    auto parser_op =
-        BlockParsingOperator(io_context_, parse_options_, num_csv_cols_, num_rows_seen_);
     ARROW_ASSIGN_OR_RAISE(
         auto decoder_op,
         BlockDecodingOperator::Make(io_context_, convert_options_, conversion_schema_));
@@ -930,8 +890,7 @@ class StreamingReaderImpl : public ReaderMixin,
     auto block_gen = SerialBlockReader::MakeAsyncIterator(
         std::move(buffer_generator), MakeChunker(parse_options_), std::move(after_header),
         read_options_.skip_rows_after_names);
-    auto parsed_block_gen =
-        MakeMappedGenerator(std::move(block_gen), std::move(parser_op));
+    auto parsed_block_gen = MakeMappedGenerator(std::move(block_gen), *parsing_operator_);
     auto rb_gen = MakeMappedGenerator(std::move(parsed_block_gen), std::move(decoder_op));
 
     auto self = shared_from_this();
@@ -1035,11 +994,7 @@ class SerialTableReader : public BaseTableReader {
         // EOF
         break;
       }
-      ARROW_ASSIGN_OR_RAISE(
-          int64_t parsed_bytes,
-          ParseAndInsert(maybe_block.partial, maybe_block.completion, maybe_block.buffer,
-                         maybe_block.block_index, maybe_block.is_final));
-      RETURN_NOT_OK(maybe_block.consume_bytes(parsed_bytes));
+      RETURN_NOT_OK(ParseAndInsert(maybe_block));
     }
     // Finish conversion, create schema and table
     RETURN_NOT_OK(task_group_->Finish());
@@ -1110,13 +1065,8 @@ class AsyncThreadedTableReader
         DCHECK(!maybe_block.consume_bytes);
 
         // Launch parse task
-        self->task_group_->Append([self, maybe_block] {
-          return self
-              ->ParseAndInsert(maybe_block.partial, maybe_block.completion,
-                               maybe_block.buffer, maybe_block.block_index,
-                               maybe_block.is_final)
-              .status();
-        });
+        self->task_group_->Append(
+            [self, maybe_block] { return self->ParseAndInsert(maybe_block); });
         return Status::OK();
       };
 
@@ -1239,12 +1189,8 @@ class CSVRowCounter : public ReaderMixin,
     // IterationEnd.
     std::function<Result<std::optional<int64_t>>(const CSVBlock&)> count_cb =
         [self](const CSVBlock& maybe_block) -> Result<std::optional<int64_t>> {
-      ARROW_ASSIGN_OR_RAISE(
-          auto parser,
-          self->Parse(maybe_block.partial, maybe_block.completion, maybe_block.buffer,
-                      maybe_block.block_index, maybe_block.is_final));
-      RETURN_NOT_OK(maybe_block.consume_bytes(parser.parsed_bytes));
-      int32_t total_row_count = parser.parser->total_num_rows();
+      ARROW_ASSIGN_OR_RAISE(auto parsed_block, self->Parse(maybe_block));
+      int32_t total_row_count = parsed_block.parser->total_num_rows();
       self->row_count_ += total_row_count;
       return total_row_count;
     };

From 8f1537468a7e5f5a526ddf1e9c634740c923404b Mon Sep 17 00:00:00 2001
From: Lubo Slivka <lubomir.slivka@gooddata.com>
Date: Thu, 8 Feb 2024 22:58:07 +0100
Subject: [PATCH 72/74] GH-40004: [Python][FlightRPC] Release GIL in
 GeneratorStream (#40005)

Fixes #40004.

* Closes: #40004

Authored-by: lupko <lubomir.slivka@gooddata.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 python/pyarrow/_flight.pyx | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx
index a2ff045f256ac..67ee7590560f0 100644
--- a/python/pyarrow/_flight.pyx
+++ b/python/pyarrow/_flight.pyx
@@ -2013,8 +2013,9 @@ cdef CStatus _data_stream_next(void* self, CFlightPayload* payload) except *:
     max_attempts = 128
     for _ in range(max_attempts):
         if stream.current_stream != nullptr:
-            check_flight_status(
-                stream.current_stream.get().Next().Value(payload))
+            with nogil:
+                check_flight_status(
+                    stream.current_stream.get().Next().Value(payload))
             # If the stream ended, see if there's another stream from the
             # generator
             if payload.ipc_message.metadata != nullptr:

From f9a88e5398fe6ec406759e55935d17bb09f9569b Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Thu, 8 Feb 2024 14:26:06 -0800
Subject: [PATCH 73/74] GH-39916: [C#] Restore support for .NET 4.6.2 (#40008)

### What changes are included in this PR?

Project targets have been added for net462 which is still in support. A few tests have been modified to allow them to build against that target.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

There are new build artifacts for Apache.Arrow.dll and Apache.Arrow.Compression.dll.

* Closes: #39916

Authored-by: Curt Hagenlocher <curt@hagenlocher.org>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../Apache.Arrow.Compression.csproj                  |  8 +++++++-
 csharp/src/Apache.Arrow/Apache.Arrow.csproj          | 12 +++++++++---
 .../Extensions/TupleExtensions.netstandard.cs        |  7 +++++++
 .../Apache.Arrow.Tests/Apache.Arrow.Tests.csproj     |  2 +-
 .../Apache.Arrow.Tests/BinaryArrayBuilderTests.cs    |  8 ++++----
 5 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj b/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj
index fded62911262c..6988567193db4 100644
--- a/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj
+++ b/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj
@@ -1,10 +1,16 @@
 <Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
-    <TargetFramework>netstandard2.0</TargetFramework>
     <Description>Provides decompression support for the Arrow IPC format</Description>
   </PropertyGroup>
 
+  <PropertyGroup Condition="'$(IsWindows)'=='true'">
+    <TargetFrameworks>netstandard2.0;net462</TargetFrameworks>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(IsWindows)'!='true'">
+    <TargetFrameworks>netstandard2.0</TargetFrameworks>
+  </PropertyGroup>
+
   <ItemGroup>
     <PackageReference Include="K4os.Compression.LZ4.Streams" Version="1.3.6" />
     <PackageReference Include="ZstdSharp.Port" Version="0.7.3" />
diff --git a/csharp/src/Apache.Arrow/Apache.Arrow.csproj b/csharp/src/Apache.Arrow/Apache.Arrow.csproj
index 3a229f4ffcaf8..c4bb64b73a9ed 100644
--- a/csharp/src/Apache.Arrow/Apache.Arrow.csproj
+++ b/csharp/src/Apache.Arrow/Apache.Arrow.csproj
@@ -1,14 +1,20 @@
 <Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
-    <TargetFrameworks>netstandard2.0;net6.0</TargetFrameworks>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <DefineConstants>$(DefineConstants);UNSAFE_BYTEBUFFER;BYTEBUFFER_NO_BOUNDS_CHECK;ENABLE_SPAN_T</DefineConstants>
     
     <Description>Apache Arrow is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware.</Description>
   </PropertyGroup>
 
-  <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETStandard'">
+  <PropertyGroup Condition="'$(IsWindows)'=='true'">
+    <TargetFrameworks>netstandard2.0;net6.0;net462</TargetFrameworks>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(IsWindows)'!='true'">
+    <TargetFrameworks>netstandard2.0;net6.0</TargetFrameworks>
+  </PropertyGroup>
+
+  <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETStandard' or '$(TargetFramework)' == 'net462'">
     <PackageReference Include="System.Buffers" Version="4.5.1" />
     <PackageReference Include="System.Memory" Version="4.5.5" />
     <PackageReference Include="System.Runtime.CompilerServices.Unsafe" Version="4.7.1" />
@@ -34,7 +40,7 @@
     </EmbeddedResource>
   </ItemGroup>
 
-  <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETStandard'">
+  <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETStandard' or '$(TargetFramework)' == 'net462'">
     <Compile Remove="Extensions\StreamExtensions.netcoreapp.cs" />
   </ItemGroup>
   <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETCoreApp'">
diff --git a/csharp/src/Apache.Arrow/Extensions/TupleExtensions.netstandard.cs b/csharp/src/Apache.Arrow/Extensions/TupleExtensions.netstandard.cs
index fe42075f14f73..e0e0f5707086b 100644
--- a/csharp/src/Apache.Arrow/Extensions/TupleExtensions.netstandard.cs
+++ b/csharp/src/Apache.Arrow/Extensions/TupleExtensions.netstandard.cs
@@ -25,5 +25,12 @@ public static void Deconstruct<T1, T2>(this Tuple<T1, T2> value, out T1 item1, o
             item1 = value.Item1;
             item2 = value.Item2;
         }
+
+        public static void Deconstruct<T1, T2, T3>(this Tuple<T1, T2, T3> value, out T1 item1, out T2 item2, out T3 item3)
+        {
+            item1 = value.Item1;
+            item2 = value.Item2;
+            item3 = value.Item3;
+        }
     }
 }
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index d8a92ff756751..c422da56b4cef 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -7,7 +7,7 @@
   </PropertyGroup>
 
   <PropertyGroup Condition="'$(IsWindows)'=='true'">
-    <TargetFrameworks>net7.0;net472</TargetFrameworks>
+    <TargetFrameworks>net7.0;net472;net462</TargetFrameworks>
   </PropertyGroup>
   <PropertyGroup Condition="'$(IsWindows)'!='true'">
     <TargetFrameworks>net7.0</TargetFrameworks>
diff --git a/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs b/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs
index 4c2b050d0c8ba..447572dda0eea 100644
--- a/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs
@@ -83,7 +83,7 @@ public void AppendSingleByte(byte[][] initialContents, byte singleByte)
                     builder.AppendRange(initialContents);
                 int initialLength = builder.Length;
                 int expectedLength = initialLength + 1;
-                var expectedArrayContents = initialContents.Append(new[] { singleByte });
+                var expectedArrayContents = initialContents.Concat(new[] { new[] { singleByte } });
 
                 // Act
                 var actualReturnValue = builder.Append(singleByte);
@@ -130,7 +130,7 @@ public void AppendNull(byte[][] initialContents)
                     builder.AppendRange(initialContents);
                 int initialLength = builder.Length;
                 int expectedLength = initialLength + 1;
-                var expectedArrayContents = initialContents.Append(null);
+                var expectedArrayContents = initialContents.Concat(new byte[][] { null });
 
                 // Act
                 var actualReturnValue = builder.AppendNull();
@@ -180,7 +180,7 @@ public void AppendReadOnlySpan(byte[][] initialContents, byte[] bytes)
                 int initialLength = builder.Length;
                 var span = (ReadOnlySpan<byte>)bytes;
                 int expectedLength = initialLength + 1;
-                var expectedArrayContents = initialContents.Append(bytes);
+                var expectedArrayContents = initialContents.Concat(new[] { bytes });
 
                 // Act
                 var actualReturnValue = builder.Append(span);
@@ -230,7 +230,7 @@ public void AppendEnumerable(byte[][] initialContents, byte[] bytes)
                 int initialLength = builder.Length;
                 int expectedLength = initialLength + 1;
                 var enumerable = (IEnumerable<byte>)bytes;
-                var expectedArrayContents = initialContents.Append(bytes);
+                var expectedArrayContents = initialContents.Concat(new[] { bytes });
 
                 // Act
                 var actualReturnValue = builder.Append(enumerable);

From a0dec7f39394e619c8bdfe0eacb6ecde73a9ec12 Mon Sep 17 00:00:00 2001
From: Thomas Newton <thomas.w.newton@gmail.com>
Date: Fri, 9 Feb 2024 01:41:36 +0000
Subject: [PATCH 74/74] GH-39352: [FS][Azure] Enable azure in builds (#39971)

### Rationale for this change

### What changes are included in this PR?
Enable Azure in linux and mac os wheel builds. Tried to copy GCS

Don't enable Azure for windows builds because windows builds where all failing. Failures were a combination of cmake version or `Could not find a package configuration file provided by "wil"`. I think it makes sense to come back to windows builds in another PR.

### Are these changes tested?
There is no new functionality to test.

### Are there any user-facing changes?
No

* Closes: #39352

Authored-by: Thomas Newton <thomas.w.newton@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ci/docker/python-wheel-manylinux.dockerfile  |  1 +
 ci/scripts/cpp_build.sh                      |  1 +
 ci/scripts/python_build.sh                   |  1 +
 ci/scripts/python_test.sh                    |  1 +
 ci/scripts/python_wheel_macos_build.sh       |  3 +++
 ci/scripts/python_wheel_manylinux_build.sh   |  3 +++
 ci/scripts/python_wheel_unix_test.sh         |  6 ++++--
 ci/vcpkg/vcpkg.json                          | 10 ++++++++++
 dev/tasks/python-wheels/github.osx.amd64.yml |  1 +
 dev/tasks/python-wheels/github.osx.arm64.yml |  1 +
 10 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile
index 2831440d5a967..b1d9ed5ab88d9 100644
--- a/ci/docker/python-wheel-manylinux.dockerfile
+++ b/ci/docker/python-wheel-manylinux.dockerfile
@@ -82,6 +82,7 @@ RUN vcpkg install \
         --clean-after-build \
         --x-install-root=${VCPKG_ROOT}/installed \
         --x-manifest-root=/arrow/ci/vcpkg \
+        --x-feature=azure \ 
         --x-feature=flight \
         --x-feature=gcs \
         --x-feature=json \
diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh
index 69d86e871ac5f..60cab1a9feaba 100755
--- a/ci/scripts/cpp_build.sh
+++ b/ci/scripts/cpp_build.sh
@@ -54,6 +54,7 @@ if [ "${GITHUB_ACTIONS:-false}" = "true" ]; then
 fi
 
 if [ "${ARROW_ENABLE_THREADING:-ON}" = "OFF" ]; then
+  ARROW_AZURE=OFF
   ARROW_FLIGHT=OFF
   ARROW_FLIGHT_SQL=OFF
   ARROW_GCS=OFF
diff --git a/ci/scripts/python_build.sh b/ci/scripts/python_build.sh
index c0a27e6e705e9..9bdcc4d687584 100755
--- a/ci/scripts/python_build.sh
+++ b/ci/scripts/python_build.sh
@@ -55,6 +55,7 @@ export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
 export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}
 
 export PYARROW_WITH_ACERO=${ARROW_ACERO:-OFF}
+export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF}
 export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF}
 export PYARROW_WITH_DATASET=${ARROW_DATASET:-ON}
 export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF}
diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh
index 8dfedb2880b50..20ca3300c0538 100755
--- a/ci/scripts/python_test.sh
+++ b/ci/scripts/python_test.sh
@@ -39,6 +39,7 @@ fi
 
 # By default, force-test all optional components
 : ${PYARROW_TEST_ACERO:=${ARROW_ACERO:-ON}}
+: ${PYARROW_TEST_AZURE:=${ARROW_AZURE:-ON}}
 : ${PYARROW_TEST_CUDA:=${ARROW_CUDA:-ON}}
 : ${PYARROW_TEST_DATASET:=${ARROW_DATASET:-ON}}
 : ${PYARROW_TEST_FLIGHT:=${ARROW_FLIGHT:-ON}}
diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index 8123a9fdf1c48..bea5409100770 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -63,6 +63,7 @@ pip install "delocate>=0.10.3"
 
 echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
 : ${ARROW_ACERO:=ON}
+: ${ARROW_AZURE:=ON}
 : ${ARROW_DATASET:=ON}
 : ${ARROW_FLIGHT:=ON}
 : ${ARROW_GANDIVA:=OFF}
@@ -95,6 +96,7 @@ pushd ${build_dir}/build
 
 cmake \
     -DARROW_ACERO=${ARROW_ACERO} \
+    -DARROW_AZURE=${ARROW_AZURE} \
     -DARROW_BUILD_SHARED=ON \
     -DARROW_BUILD_STATIC=OFF \
     -DARROW_BUILD_TESTS=OFF \
@@ -148,6 +150,7 @@ export PYARROW_BUNDLE_ARROW_CPP=1
 export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
 export PYARROW_INSTALL_TESTS=1
 export PYARROW_WITH_ACERO=${ARROW_ACERO}
+export PYARROW_WITH_AZURE=${ARROW_AZURE}
 export PYARROW_WITH_DATASET=${ARROW_DATASET}
 export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT}
 export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA}
diff --git a/ci/scripts/python_wheel_manylinux_build.sh b/ci/scripts/python_wheel_manylinux_build.sh
index 58e42fea88088..4d4d4fb694e0b 100755
--- a/ci/scripts/python_wheel_manylinux_build.sh
+++ b/ci/scripts/python_wheel_manylinux_build.sh
@@ -49,6 +49,7 @@ rm -rf /arrow/python/pyarrow/*.so.*
 
 echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
 : ${ARROW_ACERO:=ON}
+: ${ARROW_AZURE:=ON}
 : ${ARROW_DATASET:=ON}
 : ${ARROW_FLIGHT:=ON}
 : ${ARROW_GANDIVA:=OFF}
@@ -87,6 +88,7 @@ pushd /tmp/arrow-build
 
 cmake \
     -DARROW_ACERO=${ARROW_ACERO} \
+    -DARROW_AZURE=${ARROW_AZURE} \
     -DARROW_BUILD_SHARED=ON \
     -DARROW_BUILD_STATIC=OFF \
     -DARROW_BUILD_TESTS=OFF \
@@ -141,6 +143,7 @@ export PYARROW_BUNDLE_ARROW_CPP=1
 export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
 export PYARROW_INSTALL_TESTS=1
 export PYARROW_WITH_ACERO=${ARROW_ACERO}
+export PYARROW_WITH_AZURE=${ARROW_AZURE}
 export PYARROW_WITH_DATASET=${ARROW_DATASET}
 export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT}
 export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA}
diff --git a/ci/scripts/python_wheel_unix_test.sh b/ci/scripts/python_wheel_unix_test.sh
index 01250ff7ef40c..a25e5c51bddbc 100755
--- a/ci/scripts/python_wheel_unix_test.sh
+++ b/ci/scripts/python_wheel_unix_test.sh
@@ -28,15 +28,17 @@ fi
 
 source_dir=${1}
 
+: ${ARROW_AZURE:=ON}
 : ${ARROW_FLIGHT:=ON}
-: ${ARROW_SUBSTRAIT:=ON}
-: ${ARROW_S3:=ON}
 : ${ARROW_GCS:=ON}
+: ${ARROW_S3:=ON}
+: ${ARROW_SUBSTRAIT:=ON}
 : ${CHECK_IMPORTS:=ON}
 : ${CHECK_UNITTESTS:=ON}
 : ${INSTALL_PYARROW:=ON}
 
 export PYARROW_TEST_ACERO=ON
+export PYARROW_TEST_AZURE=${ARROW_AZURE}
 export PYARROW_TEST_CYTHON=OFF
 export PYARROW_TEST_DATASET=ON
 export PYARROW_TEST_FLIGHT=${ARROW_FLIGHT}
diff --git a/ci/vcpkg/vcpkg.json b/ci/vcpkg/vcpkg.json
index 99771728ecf18..e86479a7c32fc 100644
--- a/ci/vcpkg/vcpkg.json
+++ b/ci/vcpkg/vcpkg.json
@@ -105,6 +105,16 @@
         }
       ]
     },
+    "azure": {
+      "description": "Azure blob storage support",
+      "dependencies": [
+        "azure-core-cpp",
+        "azure-identity-cpp",
+        "azure-storage-blobs-cpp",
+        "azure-storage-common-cpp",
+        "azure-storage-files-datalake-cpp"
+      ]
+    },
     "orc": {
       "description": "ORC support",
       "dependencies": [
diff --git a/dev/tasks/python-wheels/github.osx.amd64.yml b/dev/tasks/python-wheels/github.osx.amd64.yml
index 526412f84214b..e31a681653b37 100644
--- a/dev/tasks/python-wheels/github.osx.amd64.yml
+++ b/dev/tasks/python-wheels/github.osx.amd64.yml
@@ -85,6 +85,7 @@ jobs:
             --clean-after-build \
             --x-install-root=${VCPKG_ROOT}/installed \
             --x-manifest-root=arrow/ci/vcpkg \
+            --x-feature=azure \
             --x-feature=flight \
             --x-feature=gcs \
             --x-feature=json \
diff --git a/dev/tasks/python-wheels/github.osx.arm64.yml b/dev/tasks/python-wheels/github.osx.arm64.yml
index 35d74f1462453..380c2e42f1d88 100644
--- a/dev/tasks/python-wheels/github.osx.arm64.yml
+++ b/dev/tasks/python-wheels/github.osx.arm64.yml
@@ -71,6 +71,7 @@ jobs:
             --clean-after-build \
             --x-install-root=${VCPKG_ROOT}/installed \
             --x-manifest-root=arrow/ci/vcpkg \
+            --x-feature=azure \
             --x-feature=flight \
             --x-feature=gcs \
             --x-feature=json \