From 9f90995c8cee0d9906349f421f2445ab9adcb7ac Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 19 Oct 2023 18:10:05 +0900 Subject: [PATCH] GH-38332: [CI][Release] Resolve symlinks in RAT lint (#38337) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Our release script (`dev/release/02-source.sh`) resolves symlinks in source archive but our lint script (`dev/archery/archery/utils/source.py`) doesn't resolve symlinks. So we may detect RAT problem by our CI. ### What changes are included in this PR? Resolve symlinks in our lint script too. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * Closes: #38332 Lead-authored-by: Sutou Kouhei Co-authored-by: Sutou Kouhei Co-authored-by: Raúl Cumplido Signed-off-by: Raúl Cumplido --- cpp/cmake_modules/snappy.diff | 19 +++++++++++++++++++ dev/archery/archery/utils/source.py | 18 +++++++++++++++--- dev/release/02-source.sh | 4 +++- dev/release/rat_exclude_files.txt | 5 ----- 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/cpp/cmake_modules/snappy.diff b/cpp/cmake_modules/snappy.diff index f86e2bb19780c..e763636e1dab7 100644 --- a/cpp/cmake_modules/snappy.diff +++ b/cpp/cmake_modules/snappy.diff @@ -1,3 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# https://github.com/google/snappy/pull/172 + diff --git a/snappy.cc b/snappy.cc index d414718..5b0d0d6 100644 --- a/snappy.cc diff --git a/dev/archery/archery/utils/source.py b/dev/archery/archery/utils/source.py index 37d8cd502a3a0..1915b8f2ef305 100644 --- a/dev/archery/archery/utils/source.py +++ b/dev/archery/archery/utils/source.py @@ -18,7 +18,9 @@ import os from pathlib import Path import subprocess +import tempfile +from .command import Command from .git import git @@ -117,10 +119,20 @@ def archive(self, path, dereference=False, compressor=None, revision=None): raise ValueError("{} is not backed by git".format(self)) rev = revision if revision else "HEAD" - archive = git.archive("--prefix=apache-arrow/", rev, + archive = git.archive("--prefix=apache-arrow.tmp/", rev, git_dir=self.path) - - # TODO(fsaintjacques): fix dereference for + with tempfile.TemporaryDirectory() as tmp: + tmp = Path(tmp) + tar_path = tmp / "apache-arrow.tar" + with open(tar_path, "wb") as tar: + tar.write(archive) + Command("tar").run("xf", tar_path, "-C", tmp) + # Must use the same logic in dev/release/02-source.sh + Command("cp").run("-R", "-L", tmp / + "apache-arrow.tmp", tmp / "apache-arrow") + Command("tar").run("cf", tar_path, "-C", tmp, "apache-arrow") + with open(tar_path, "rb") as tar: + archive = tar.read() if compressor: archive = compressor(archive) diff --git a/dev/release/02-source.sh b/dev/release/02-source.sh index e9cd7126361cd..a3441b23bf539 100755 --- a/dev/release/02-source.sh +++ b/dev/release/02-source.sh @@ -65,7 +65,9 @@ rm -rf ${tag} git archive ${release_hash} --prefix ${tag}/) | \ tar xf - -# Resolve all hard and symbolic links +# Resolve all hard and symbolic links. +# If we change this, we must change ArrowSources.archive in +# dev/archery/archery/utils/source.py too. rm -rf ${tag}.tmp mv ${tag} ${tag}.tmp cp -R -L ${tag}.tmp ${tag} diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index af084ea215621..ce637bf839232 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -22,9 +22,6 @@ cpp/build-support/cpplint.py cpp/build-support/lint_exclusions.txt cpp/build-support/iwyu/* cpp/cmake_modules/FindPythonLibsNew.cmake -cpp/cmake_modules/SnappyCMakeLists.txt -cpp/cmake_modules/SnappyConfig.h -cpp/cmake_modules/snappy.diff cpp/examples/parquet/parquet-arrow/cmake_modules/FindArrow.cmake cpp/src/parquet/.parquetcppversion cpp/src/generated/parquet_constants.cpp @@ -89,8 +86,6 @@ js/yarn.lock js/.eslintignore python/cmake_modules python/cmake_modules/FindPythonLibsNew.cmake -python/cmake_modules/SnappyCMakeLists.txt -python/cmake_modules/SnappyConfig.h python/MANIFEST.in python/manylinux1/.dockerignore python/pyarrow/includes/__init__.pxd