From 2f7984bfeb97f523226e448023be8e8c7512d737 Mon Sep 17 00:00:00 2001 From: "jinli.zjw" Date: Mon, 25 May 2026 14:19:16 +0800 Subject: [PATCH 1/4] feat(build): migrate build support scripts --- build_support/asan-suppressions.txt | 19 + build_support/asan_symbolize.py | 368 ++++++++++++++++++ build_support/get-upstream-commit.sh | 25 ++ build_support/iwyu/iwyu-filter.awk | 96 +++++ build_support/iwyu/iwyu.sh | 89 +++++ build_support/iwyu/iwyu_tool.py | 280 +++++++++++++ build_support/iwyu/mappings/arrow.imp | 20 + build_support/iwyu/mappings/gflags.imp | 20 + build_support/iwyu/mappings/glog.imp | 27 ++ build_support/iwyu/mappings/gmock.imp | 30 ++ build_support/iwyu/mappings/gtest.imp | 36 ++ build_support/iwyu/mappings/paimon-misc.imp | 74 ++++ build_support/lint_exclusions.txt | 13 + build_support/lintutils.py | 119 ++++++ build_support/lsan-suppressions.txt | 19 + build_support/run-test.sh | 244 ++++++++++++ build_support/run_clang_format.py | 137 +++++++ build_support/run_clang_tidy.py | 149 +++++++ .../sanitizer-disallowed-entries.txt | 22 ++ build_support/stacktrace_addr2line.pl | 92 +++++ build_support/tsan-suppressions.txt | 16 + build_support/ubsan-suppressions.txt | 16 + 22 files changed, 1911 insertions(+) create mode 100644 build_support/asan-suppressions.txt create mode 100755 build_support/asan_symbolize.py create mode 100755 build_support/get-upstream-commit.sh create mode 100644 build_support/iwyu/iwyu-filter.awk create mode 100755 build_support/iwyu/iwyu.sh create mode 100755 build_support/iwyu/iwyu_tool.py create mode 100644 build_support/iwyu/mappings/arrow.imp create mode 100644 build_support/iwyu/mappings/gflags.imp create mode 100644 build_support/iwyu/mappings/glog.imp create mode 100644 build_support/iwyu/mappings/gmock.imp create mode 100644 build_support/iwyu/mappings/gtest.imp create mode 100644 build_support/iwyu/mappings/paimon-misc.imp create mode 100644 build_support/lint_exclusions.txt create mode 100644 build_support/lintutils.py create mode 100644 build_support/lsan-suppressions.txt create mode 100755 build_support/run-test.sh create mode 100755 build_support/run_clang_format.py create mode 100755 build_support/run_clang_tidy.py create mode 100644 build_support/sanitizer-disallowed-entries.txt create mode 100755 build_support/stacktrace_addr2line.pl create mode 100644 build_support/tsan-suppressions.txt create mode 100644 build_support/ubsan-suppressions.txt diff --git a/build_support/asan-suppressions.txt b/build_support/asan-suppressions.txt new file mode 100644 index 0000000..78866d5 --- /dev/null +++ b/build_support/asan-suppressions.txt @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Note this file is merely a placeholder that contains no suppressions for now. +# But it may become useful in the future. diff --git a/build_support/asan_symbolize.py b/build_support/asan_symbolize.py new file mode 100755 index 0000000..bffb75a --- /dev/null +++ b/build_support/asan_symbolize.py @@ -0,0 +1,368 @@ +#!/usr/bin/env python +#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# +import bisect +import os +import re +import subprocess +import sys + +llvm_symbolizer = None +symbolizers = {} +filetypes = {} +vmaddrs = {} +DEBUG = False + + +# FIXME: merge the code that calls fix_filename(). +def fix_filename(file_name): + for path_to_cut in sys.argv[1:]: + file_name = re.sub('.*' + path_to_cut, '', file_name) + file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) + file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) + return file_name + + +class Symbolizer(object): + def __init__(self): + pass + + def symbolize(self, addr, binary, offset): + """Symbolize the given address (pair of binary and offset). + + Overridden in subclasses. + Args: + addr: virtual address of an instruction. + binary: path to executable/shared object containing this instruction. + offset: instruction offset in the @binary. + Returns: + list of strings (one string for each inlined frame) describing + the code locations for this instruction (that is, function name, file + name, line and column numbers). + """ + return None + + +class LLVMSymbolizer(Symbolizer): + def __init__(self, symbolizer_path): + super(LLVMSymbolizer, self).__init__() + self.symbolizer_path = symbolizer_path + self.pipe = self.open_llvm_symbolizer() + + def open_llvm_symbolizer(self): + if not os.path.exists(self.symbolizer_path): + return None + cmd = [self.symbolizer_path, + '--use-symbol-table=true', + '--demangle=false', + '--functions=true', + '--inlining=true'] + if DEBUG: + print(' '.join(cmd)) + return subprocess.Popen(cmd, stdin=subprocess.PIPE, + stdout=subprocess.PIPE) + + def symbolize(self, addr, binary, offset): + """Overrides Symbolizer.symbolize.""" + if not self.pipe: + return None + result = [] + try: + symbolizer_input = '%s %s' % (binary, offset) + if DEBUG: + print(symbolizer_input) + self.pipe.stdin.write(symbolizer_input) + self.pipe.stdin.write('\n') + while True: + function_name = self.pipe.stdout.readline().rstrip() + if not function_name: + break + file_name = self.pipe.stdout.readline().rstrip() + file_name = fix_filename(file_name) + if (not function_name.startswith('??') and + not file_name.startswith('??')): + # Append only valid frames. + result.append('%s in %s %s' % (addr, function_name, + file_name)) + except Exception: + result = [] + if not result: + result = None + return result + + +def LLVMSymbolizerFactory(system): + symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') + if not symbolizer_path: + # Assume llvm-symbolizer is in PATH. + symbolizer_path = 'llvm-symbolizer' + return LLVMSymbolizer(symbolizer_path) + + +class Addr2LineSymbolizer(Symbolizer): + def __init__(self, binary): + super(Addr2LineSymbolizer, self).__init__() + self.binary = binary + self.pipe = self.open_addr2line() + + def open_addr2line(self): + cmd = ['addr2line', '-f', '-e', self.binary] + if DEBUG: + print(' '.join(cmd)) + return subprocess.Popen(cmd, + stdin=subprocess.PIPE, stdout=subprocess.PIPE) + + def symbolize(self, addr, binary, offset): + """Overrides Symbolizer.symbolize.""" + if self.binary != binary: + return None + try: + self.pipe.stdin.write(offset) + self.pipe.stdin.write('\n') + function_name = self.pipe.stdout.readline().rstrip() + file_name = self.pipe.stdout.readline().rstrip() + except Exception: + function_name = '' + file_name = '' + file_name = fix_filename(file_name) + return ['%s in %s %s' % (addr, function_name, file_name)] + + +class DarwinSymbolizer(Symbolizer): + def __init__(self, addr, binary): + super(DarwinSymbolizer, self).__init__() + self.binary = binary + # Guess which arch we're running. 10 = len('0x') + 8 hex digits. + if len(addr) > 10: + self.arch = 'x86_64' + else: + self.arch = 'i386' + self.vmaddr = None + self.pipe = None + + def write_addr_to_pipe(self, offset): + self.pipe.stdin.write('0x%x' % int(offset, 16)) + self.pipe.stdin.write('\n') + + def open_atos(self): + if DEBUG: + print('atos -o %s -arch %s' % (self.binary, self.arch)) + cmdline = ['atos', '-o', self.binary, '-arch', self.arch] + self.pipe = subprocess.Popen(cmdline, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + def symbolize(self, addr, binary, offset): + """Overrides Symbolizer.symbolize.""" + if self.binary != binary: + return None + self.open_atos() + self.write_addr_to_pipe(offset) + self.pipe.stdin.close() + atos_line = self.pipe.stdout.readline().rstrip() + # A well-formed atos response looks like this: + # foo(type1, type2) (in object.name) (filename.cc:80) + match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) + if DEBUG: + print('atos_line: {0}'.format(atos_line)) + if match: + function_name = match.group(1) + function_name = re.sub('\(.*?\)', '', function_name) + file_name = fix_filename(match.group(3)) + return ['%s in %s %s' % (addr, function_name, file_name)] + else: + return ['%s in %s' % (addr, atos_line)] + + +# Chain several symbolizers so that if one symbolizer fails, we fall back +# to the next symbolizer in chain. +class ChainSymbolizer(Symbolizer): + def __init__(self, symbolizer_list): + super(ChainSymbolizer, self).__init__() + self.symbolizer_list = symbolizer_list + + def symbolize(self, addr, binary, offset): + """Overrides Symbolizer.symbolize.""" + for symbolizer in self.symbolizer_list: + if symbolizer: + result = symbolizer.symbolize(addr, binary, offset) + if result: + return result + return None + + def append_symbolizer(self, symbolizer): + self.symbolizer_list.append(symbolizer) + + +def BreakpadSymbolizerFactory(binary): + suffix = os.getenv('BREAKPAD_SUFFIX') + if suffix: + filename = binary + suffix + if os.access(filename, os.F_OK): + return BreakpadSymbolizer(filename) + return None + + +def SystemSymbolizerFactory(system, addr, binary): + if system == 'Darwin': + return DarwinSymbolizer(addr, binary) + elif system == 'Linux': + return Addr2LineSymbolizer(binary) + + +class BreakpadSymbolizer(Symbolizer): + def __init__(self, filename): + super(BreakpadSymbolizer, self).__init__() + self.filename = filename + lines = file(filename).readlines() + self.files = [] + self.symbols = {} + self.address_list = [] + self.addresses = {} + # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t + fragments = lines[0].rstrip().split() + self.arch = fragments[2] + self.debug_id = fragments[3] + self.binary = ' '.join(fragments[4:]) + self.parse_lines(lines[1:]) + + def parse_lines(self, lines): + cur_function_addr = '' + for line in lines: + fragments = line.split() + if fragments[0] == 'FILE': + assert int(fragments[1]) == len(self.files) + self.files.append(' '.join(fragments[2:])) + elif fragments[0] == 'PUBLIC': + self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) + elif fragments[0] in ['CFI', 'STACK']: + pass + elif fragments[0] == 'FUNC': + cur_function_addr = int(fragments[1], 16) + if not cur_function_addr in self.symbols.keys(): + self.symbols[cur_function_addr] = ' '.join(fragments[4:]) + else: + # Line starting with an address. + addr = int(fragments[0], 16) + self.address_list.append(addr) + # Tuple of symbol address, size, line, file number. + self.addresses[addr] = (cur_function_addr, + int(fragments[1], 16), + int(fragments[2]), + int(fragments[3])) + self.address_list.sort() + + def get_sym_file_line(self, addr): + key = None + if addr in self.addresses.keys(): + key = addr + else: + index = bisect.bisect_left(self.address_list, addr) + if index == 0: + return None + else: + key = self.address_list[index - 1] + sym_id, size, line_no, file_no = self.addresses[key] + symbol = self.symbols[sym_id] + filename = self.files[file_no] + if addr < key + size: + return symbol, filename, line_no + else: + return None + + def symbolize(self, addr, binary, offset): + if self.binary != binary: + return None + res = self.get_sym_file_line(int(offset, 16)) + if res: + function_name, file_name, line_no = res + result = ['%s in %s %s:%d' % ( + addr, function_name, file_name, line_no)] + print(result) + return result + else: + return None + + +class SymbolizationLoop(object): + def __init__(self, binary_name_filter=None): + # Used by clients who may want to supply a different binary name. + # E.g. in Chrome several binaries may share a single .dSYM. + self.binary_name_filter = binary_name_filter + self.system = os.uname()[0] + if self.system in ['Linux', 'Darwin']: + self.llvm_symbolizer = LLVMSymbolizerFactory(self.system) + else: + raise Exception('Unknown system') + + def symbolize_address(self, addr, binary, offset): + # Use the chain of symbolizers: + # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos + # (fall back to next symbolizer if the previous one fails). + if not binary in symbolizers: + symbolizers[binary] = ChainSymbolizer( + [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer]) + result = symbolizers[binary].symbolize(addr, binary, offset) + if result is None: + # Initialize system symbolizer only if other symbolizers failed. + symbolizers[binary].append_symbolizer( + SystemSymbolizerFactory(self.system, addr, binary)) + result = symbolizers[binary].symbolize(addr, binary, offset) + # The system symbolizer must produce some result. + assert result + return result + + def print_symbolized_lines(self, symbolized_lines): + if not symbolized_lines: + print(self.current_line) + else: + for symbolized_frame in symbolized_lines: + print(' #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip()) + self.frame_no += 1 + + def process_stdin(self): + self.frame_no = 0 + + if sys.version_info[0] == 2: + sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) + else: + # Unbuffered output is not supported in Python 3 + sys.stdout = os.fdopen(sys.stdout.fileno(), 'w') + + while True: + line = sys.stdin.readline() + if not line: break + self.current_line = line.rstrip() + #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) + stack_trace_line_format = ( + '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') + match = re.match(stack_trace_line_format, line) + if not match: + print(self.current_line) + continue + if DEBUG: + print(line) + _, frameno_str, addr, binary, offset = match.groups() + if frameno_str == '0': + # Assume that frame #0 is the first frame of new stack trace. + self.frame_no = 0 + original_binary = binary + if self.binary_name_filter: + binary = self.binary_name_filter(binary) + symbolized_line = self.symbolize_address(addr, binary, offset) + if not symbolized_line: + if original_binary != binary: + symbolized_line = self.symbolize_address(addr, binary, offset) + self.print_symbolized_lines(symbolized_line) + + +if __name__ == '__main__': + loop = SymbolizationLoop() + loop.process_stdin() diff --git a/build_support/get-upstream-commit.sh b/build_support/get-upstream-commit.sh new file mode 100755 index 0000000..f9db043 --- /dev/null +++ b/build_support/get-upstream-commit.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Script which tries to determine the most recent git hash in the current +# branch which originates from master by checking for the +# 'PAIMON-1234: Description` commit message +set -e + +git log --grep='^PAIMON-[0-9]*:.*' -n1 --pretty=format:%H diff --git a/build_support/iwyu/iwyu-filter.awk b/build_support/iwyu/iwyu-filter.awk new file mode 100644 index 0000000..394bf08 --- /dev/null +++ b/build_support/iwyu/iwyu-filter.awk @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# +# This is an awk script to process output from the include-what-you-use (IWYU) +# tool. As of now, IWYU is of alpha quality and it gives many incorrect +# recommendations -- obviously invalid or leading to compilation breakage. +# Most of those can be silenced using appropriate IWYU pragmas, but it's not +# the case for the auto-generated files. +# +# Also, it's possible to address invalid recommendation using mappings: +# https://github.com/include-what-you-use/include-what-you-use/blob/master/docs/IWYUMappings.md +# +# Usage: +# 1. Run the CMake with -DCMAKE_CXX_INCLUDE_WHAT_YOU_USE= +# +# The path to the IWYU binary should be absolute. The path to the binary +# and the command-line options should be separated by semicolon +# (that's for feeding it into CMake list variables). +# +# E.g., from the build directory (line breaks are just for readability): +# +# CC=../../thirdparty/clang-toolchain/bin/clang +# CXX=../../thirdparty/clang-toolchain/bin/clang++ +# IWYU="`pwd`../../thirdparty/clang-toolchain/bin/include-what-you-use;\ +# -Xiwyu;--mapping_file=`pwd`../../build_support/iwyu/mappings/map.imp" +# +# ../../build_support/enable_devtoolset.sh \ +# env CC=$CC CXX=$CXX \ +# ../../thirdparty/installed/common/bin/cmake \ +# -DCMAKE_CXX_INCLUDE_WHAT_YOU_USE=\"$IWYU\" \ +# ../.. +# +# NOTE: +# Since the arrow code has some 'ifdef NDEBUG' directives, it's possible +# that IWYU would produce different results if run against release, not +# debug build. However, we plan to use the tool only with debug builds. +# +# 2. Run make, separating the output from the IWYU tool into a separate file +# (it's possible to use piping the output from the tool to the script +# but having a file is good for future reference, if necessary): +# +# make -j$(nproc) 2>/tmp/iwyu.log +# +# 3. Process the output from the IWYU tool using the script: +# +# awk -f ../../build_support/iwyu/iwyu-filter.awk /tmp/iwyu.log +# + +BEGIN { + # This is the list of the files for which the suggestions from IWYU are + # ignored. Eventually, this list should become empty as soon as all the valid + # suggestions are addressed and invalid ones are taken care either by proper + # IWYU pragmas or adding special mappings (e.g. like boost mappings). + # muted["relative/path/to/file"] + muted["arrow/util/bit-util-test.cc"] + muted["arrow/util/rle-encoding-test.cc"] + muted["arrow/vendored"] + muted["include/hdfs.h"] + muted["arrow/visitor.h"] +} + +# mute all suggestions for the auto-generated files +/.*\.(pb|proxy|service)\.(cc|h) should (add|remove) these lines:/, /^$/ { + next +} + +# mute suggestions for the explicitly specified files +/.* should (add|remove) these lines:/ { + do_print = 1 + for (path in muted) { + if (index($0, path)) { + do_print = 0 + break + } + } +} +/^$/ { + if (do_print) print + do_print = 0 +} +{ if (do_print) print } diff --git a/build_support/iwyu/iwyu.sh b/build_support/iwyu/iwyu.sh new file mode 100755 index 0000000..09b4803 --- /dev/null +++ b/build_support/iwyu/iwyu.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +set -uo pipefail + +ROOT=$(cd $(dirname $BASH_SOURCE)/../..; pwd) + +IWYU_LOG=$(mktemp -t paimon-cpp-iwyu.XXXXXX) +trap "rm -f $IWYU_LOG" EXIT + +IWYU_MAPPINGS_PATH="$ROOT/build_support/iwyu/mappings" +IWYU_ARGS="\ + --mapping_file=$IWYU_MAPPINGS_PATH/paimon-misc.imp \ + --mapping_file=$IWYU_MAPPINGS_PATH/arrow.imp \ + --mapping_file=$IWYU_MAPPINGS_PATH/gflags.imp \ + --mapping_file=$IWYU_MAPPINGS_PATH/glog.imp \ + --mapping_file=$IWYU_MAPPINGS_PATH/gmock.imp \ + --mapping_file=$IWYU_MAPPINGS_PATH/gtest.imp" + +set -e + +affected_files() { + pushd $ROOT > /dev/null + local commit=$($ROOT/build_support/get-upstream-commit.sh) + git diff --name-only $commit | awk '/\.(c|cc|cpp|h)$/' + popd > /dev/null +} + +# Show the IWYU version. Also causes the script to fail if iwyu is not in your +# PATH +include-what-you-use --version + +if [[ "${1:-}" == "all" ]]; then + python $ROOT/build_support/iwyu/iwyu_tool.py -p ${IWYU_COMPILATION_DATABASE_PATH:-.} \ + -- $IWYU_ARGS + #| awk -f $ROOT/build_support/iwyu/iwyu-filter.awk +elif [[ "${1:-}" == "match" ]]; then + ALL_FILES= + IWYU_FILE_LIST= + for path in $(find $ROOT/src -type f | awk '/\.(c|cc|cpp|h)$/'); do + if [[ $path =~ $2 ]]; then + IWYU_FILE_LIST="$IWYU_FILE_LIST $path" + fi + done + + echo "Running IWYU on $IWYU_FILE_LIST" + python $ROOT/build_support/iwyu/iwyu_tool.py \ + -p ${IWYU_COMPILATION_DATABASE_PATH:-.} $IWYU_FILE_LIST -- \ + $IWYU_ARGS | awk -f $ROOT/build_support/iwyu/iwyu-filter.awk +else + # Build the list of updated files which are of IWYU interest. + file_list_tmp=$(affected_files) + if [ -z "$file_list_tmp" ]; then + exit 0 + fi + + # Adjust the path for every element in the list. The iwyu_tool.py normalizes + # paths (via realpath) to match the records from the compilation database. + IWYU_FILE_LIST= + for p in $file_list_tmp; do + IWYU_FILE_LIST="$IWYU_FILE_LIST $ROOT/$p" + done + + python $ROOT/build_support/iwyu/iwyu_tool.py \ + -p ${IWYU_COMPILATION_DATABASE_PATH:-.} $IWYU_FILE_LIST -- \ + $IWYU_ARGS | awk -f $ROOT/build_support/iwyu/iwyu-filter.awk > $IWYU_LOG +fi + +if [ -s "$IWYU_LOG" ]; then + # The output is not empty: the changelist needs correction. + cat $IWYU_LOG 1>&2 + exit 1 +fi diff --git a/build_support/iwyu/iwyu_tool.py b/build_support/iwyu/iwyu_tool.py new file mode 100755 index 0000000..1429e0c --- /dev/null +++ b/build_support/iwyu/iwyu_tool.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python + +# This file has been imported into the apache source tree from +# the IWYU source tree as of version 0.8 +# https://github.com/include-what-you-use/include-what-you-use/blob/master/iwyu_tool.py +# and corresponding license has been added: +# https://github.com/include-what-you-use/include-what-you-use/blob/master/LICENSE.TXT +# +# ============================================================================== +# LLVM Release License +# ============================================================================== +# University of Illinois/NCSA +# Open Source License +# +# Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign. +# All rights reserved. +# +# Developed by: +# +# LLVM Team +# +# University of Illinois at Urbana-Champaign +# +# http://llvm.org +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal with +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# of the Software, and to permit persons to whom the Software is furnished to do +# so, subject to the following conditions: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimers. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimers in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the names of the LLVM Team, University of Illinois at +# Urbana-Champaign, nor the names of its contributors may be used to +# endorse or promote products derived from this Software without specific +# prior written permission. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +# SOFTWARE. + +""" Driver to consume a Clang compilation database and invoke IWYU. + +Example usage with CMake: + + # Unix systems + $ mkdir build && cd build + $ CC="clang" CXX="clang++" cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ... + $ iwyu_tool.py -p . + + # Windows systems + $ mkdir build && cd build + $ cmake -DCMAKE_CXX_COMPILER="%VCINSTALLDIR%/bin/cl.exe" \ + -DCMAKE_C_COMPILER="%VCINSTALLDIR%/VC/bin/cl.exe" \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + -G Ninja ... + $ python iwyu_tool.py -p . + +See iwyu_tool.py -h for more details on command-line arguments. +""" + +import os +import sys +import json +import argparse +import subprocess +import re + +import logging + +logging.basicConfig(filename='iwyu.log') +LOGGER = logging.getLogger("iwyu") + + +def iwyu_formatter(output): + """ Process iwyu's output, basically a no-op. """ + print('\n'.join(output)) + + +CORRECT_RE = re.compile(r'^\((.*?) has correct #includes/fwd-decls\)$') +SHOULD_ADD_RE = re.compile(r'^(.*?) should add these lines:$') +SHOULD_REMOVE_RE = re.compile(r'^(.*?) should remove these lines:$') +FULL_LIST_RE = re.compile(r'The full include-list for (.*?):$') +END_RE = re.compile(r'^---$') +LINES_RE = re.compile(r'^- (.*?) // lines ([0-9]+)-[0-9]+$') + + +GENERAL, ADD, REMOVE, LIST = range(4) + + +def clang_formatter(output): + """ Process iwyu's output into something clang-like. """ + state = (GENERAL, None) + for line in output: + match = CORRECT_RE.match(line) + if match: + print('%s:1:1: note: #includes/fwd-decls are correct', match.groups(1)) + continue + match = SHOULD_ADD_RE.match(line) + if match: + state = (ADD, match.group(1)) + continue + match = SHOULD_REMOVE_RE.match(line) + if match: + state = (REMOVE, match.group(1)) + continue + match = FULL_LIST_RE.match(line) + if match: + state = (LIST, match.group(1)) + elif END_RE.match(line): + state = (GENERAL, None) + elif not line.strip(): + continue + elif state[0] == GENERAL: + print(line) + elif state[0] == ADD: + print('%s:1:1: error: add the following line', state[1]) + print(line) + elif state[0] == REMOVE: + match = LINES_RE.match(line) + line_no = match.group(2) if match else '1' + print('%s:%s:1: error: remove the following line', state[1], line_no) + print(match.group(1)) + + +DEFAULT_FORMAT = 'iwyu' +FORMATTERS = { + 'iwyu': iwyu_formatter, + 'clang': clang_formatter +} + + +def get_output(cwd, command): + """ Run the given command and return its output as a string. """ + process = subprocess.Popen(command, + cwd=cwd, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + return process.communicate()[0].decode("utf-8").splitlines() + + +def run_iwyu(cwd, compile_command, iwyu_args, verbose, formatter): + """ Rewrite compile_command to an IWYU command, and run it. """ + compiler, _, args = compile_command.partition(' ') + if compiler.endswith('cl.exe'): + # If the compiler name is cl.exe, let IWYU be cl-compatible + clang_args = ['--driver-mode=cl'] + else: + clang_args = [] + + iwyu_args = ['-Xiwyu ' + a for a in iwyu_args] + command = ['include-what-you-use'] + clang_args + iwyu_args + command = '%s %s' % (' '.join(command), args.strip()) + + if verbose: + print('%s:', command) + + formatter(get_output(cwd, command)) + + +def main(compilation_db_path, source_files, verbose, formatter, iwyu_args): + """ Entry point. """ + # Canonicalize compilation database path + if os.path.isdir(compilation_db_path): + compilation_db_path = os.path.join(compilation_db_path, + 'compile_commands.json') + + compilation_db_path = os.path.realpath(compilation_db_path) + if not os.path.isfile(compilation_db_path): + print('ERROR: No such file or directory: \'%s\'', compilation_db_path) + return 1 + + # Read compilation db from disk + with open(compilation_db_path, 'r') as fileobj: + compilation_db = json.load(fileobj) + + # expand symlinks + for entry in compilation_db: + entry['file'] = os.path.realpath(entry['file']) + + # Cross-reference source files with compilation database + source_files = [os.path.realpath(s) for s in source_files] + if not source_files: + # No source files specified, analyze entire compilation database + entries = compilation_db + else: + # Source files specified, analyze the ones appearing in compilation db, + # warn for the rest. + entries = [] + for source in source_files: + matches = [e for e in compilation_db if e['file'] == source] + if matches: + entries.extend(matches) + else: + print("{} not in compilation database".format(source)) + # TODO: As long as there is no complete compilation database available this check cannot be performed + pass + #print('WARNING: \'%s\' not found in compilation database.', source) + + # Run analysis + try: + for entry in entries: + cwd, compile_command = entry['directory'], entry['command'] + run_iwyu(cwd, compile_command, iwyu_args, verbose, formatter) + except OSError as why: + print('ERROR: Failed to launch include-what-you-use: %s', why) + return 1 + + return 0 + + +def _bootstrap(): + """ Parse arguments and dispatch to main(). """ + # This hackery is necessary to add the forwarded IWYU args to the + # usage and help strings. + def customize_usage(parser): + """ Rewrite the parser's format_usage. """ + original_format_usage = parser.format_usage + parser.format_usage = lambda: original_format_usage().rstrip() + \ + ' -- []' + os.linesep + + def customize_help(parser): + """ Rewrite the parser's format_help. """ + original_format_help = parser.format_help + + def custom_help(): + """ Customized help string, calls the adjusted format_usage. """ + helpmsg = original_format_help() + helplines = helpmsg.splitlines() + helplines[0] = parser.format_usage().rstrip() + return os.linesep.join(helplines) + os.linesep + + parser.format_help = custom_help + + # Parse arguments + parser = argparse.ArgumentParser( + description='Include-what-you-use compilation database driver.', + epilog='Assumes include-what-you-use is available on the PATH.') + customize_usage(parser) + customize_help(parser) + + parser.add_argument('-v', '--verbose', action='store_true', + help='Print IWYU commands') + parser.add_argument('-o', '--output-format', type=str, + choices=FORMATTERS.keys(), default=DEFAULT_FORMAT, + help='Output format (default: %s)' % DEFAULT_FORMAT) + parser.add_argument('-p', metavar='', required=True, + help='Compilation database path', dest='dbpath') + parser.add_argument('source', nargs='*', + help='Zero or more source files to run IWYU on. ' + 'Defaults to all in compilation database.') + + def partition_args(argv): + """ Split around '--' into driver args and IWYU args. """ + try: + double_dash = argv.index('--') + return argv[:double_dash], argv[double_dash+1:] + except ValueError: + return argv, [] + argv, iwyu_args = partition_args(sys.argv[1:]) + args = parser.parse_args(argv) + + sys.exit(main(args.dbpath, args.source, args.verbose, + FORMATTERS[args.output_format], iwyu_args)) + + +if __name__ == '__main__': + _bootstrap() diff --git a/build_support/iwyu/mappings/arrow.imp b/build_support/iwyu/mappings/arrow.imp new file mode 100644 index 0000000..3fbf336 --- /dev/null +++ b/build_support/iwyu/mappings/arrow.imp @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +[ + {include: ["", public, '"arrow/api.h"', public]}, + {include: ["", public, '"arrow/c/abi.h"', public]}, +] diff --git a/build_support/iwyu/mappings/gflags.imp b/build_support/iwyu/mappings/gflags.imp new file mode 100644 index 0000000..46ce63d --- /dev/null +++ b/build_support/iwyu/mappings/gflags.imp @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +[ + # confuses the IWYU tool because of the 'using ' + { symbol: [ "fLS::clstring", private, "", public ] } +] diff --git a/build_support/iwyu/mappings/glog.imp b/build_support/iwyu/mappings/glog.imp new file mode 100644 index 0000000..08c5e35 --- /dev/null +++ b/build_support/iwyu/mappings/glog.imp @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +[ + { symbol: [ "LOG", private, "", public ] }, + { symbol: [ "VLOG", private, "", public ] }, + { symbol: [ "CHECK_EQ", private, "", public ] }, + { symbol: [ "CHECK_NE", private, "", public ] }, + { symbol: [ "CHECK_LT", private, "", public ] }, + { symbol: [ "CHECK_GE", private, "", public ] }, + { symbol: [ "CHECK_GT", private, "", public ] }, + { symbol: [ "ErrnoLogMessage", private, "", public ] }, + { symbol: [ "COMPACT_GOOGLE_LOG_0", private, "", public ] } +] diff --git a/build_support/iwyu/mappings/gmock.imp b/build_support/iwyu/mappings/gmock.imp new file mode 100644 index 0000000..5336799 --- /dev/null +++ b/build_support/iwyu/mappings/gmock.imp @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +[ + { + include: [ + "", + private, + '"gmock/gmock.h"', + public, + ] + }, + {include: ["", private, '"gmock/gmock.h"', public]}, + {include: ["", private, '"gmock/gmock.h"', public]}, + {include: ["", private, '"gmock/gmock.h"', public]}, + {include: ["", private, '"gmock/gmock.h"', public]}, +] diff --git a/build_support/iwyu/mappings/gtest.imp b/build_support/iwyu/mappings/gtest.imp new file mode 100644 index 0000000..7899ae9 --- /dev/null +++ b/build_support/iwyu/mappings/gtest.imp @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +[ + { + include: [ + "", + private, + '"gtest/gtest.h"', + public, + ] + }, + {include: ["", private, '"gtest/gtest.h"', public]}, + {include: ["", private, '"gtest/gtest.h"', public]}, + {include: ["", private, '"gtest/gtest.h"', public]}, + {include: ["", private, '"gtest/gtest.h"', public]}, + {include: ["", private, '"gtest/gtest.h"', public]}, + {include: ["", private, '"gtest/gtest.h"', public]}, + {include: ["", private, '"gtest/gtest.h"', public]}, + {include: ["", private, '"gtest/gtest.h"', public]}, + {include: ["", private, '"gtest/gtest.h"', public]}, + {include: ["", public, '"gtest/gtest.h"', public]}, +] diff --git a/build_support/iwyu/mappings/paimon-misc.imp b/build_support/iwyu/mappings/paimon-misc.imp new file mode 100644 index 0000000..67f3124 --- /dev/null +++ b/build_support/iwyu/mappings/paimon-misc.imp @@ -0,0 +1,74 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +[ + {include: ["", private, "", public]}, + {include: ["", private, "", public]}, + {include: ["", private, "", public]}, + {include: ["", private, "", public]}, + {include: ["", private, "", public]}, + {include: ["", private, "", public]}, + {include: ["", private, "", public]}, + {include: ["", private, "", public]}, + {include: ["", private, "", public]}, + {include: ["", private, "", public]}, + {include: ["", private, "", public]}, + {include: ["", private, "", public]}, + {include: ["", private, "", public]}, + {include: ["", private, "", public]}, + {include: ["", private, "", public]}, + {include: ["", private, "", public]}, + {include: ["", private, "", public]}, + {include: ["", public, "", public]}, + {include: ["", public, "", public]}, + {include: ["", public, "", public]}, + {symbol: ["bool", private, "", public]}, + {symbol: ["false", private, "", public]}, + {symbol: ["true", private, "", public]}, + {symbol: ["int8_t", private, "", public]}, + {symbol: ["int16_t", private, "", public]}, + {symbol: ["int32_t", private, "", public]}, + {symbol: ["int64_t", private, "", public]}, + {symbol: ["uint8_t", private, "", public]}, + {symbol: ["uint16_t", private, "", public]}, + {symbol: ["uint32_t", private, "", public]}, + {symbol: ["uint64_t", private, "", public]}, + {symbol: ["size_t", private, "", public]}, + {symbol: ["make_shared", private, "", public]}, + {symbol: ["shared_ptr", private, "", public]}, + {symbol: ["std::copy", private, "", public]}, + {symbol: ["std::move", private, "", public]}, + {symbol: ["std::transform", private, "", public]}, + {symbol: ["pair", private, "", public]}, + {symbol: ["errno", private, "", public]}, + {symbol: ["posix_memalign", private, "", public]}, + { + include: [ + "", + public, + '"tbb/concurrent_hash_map.h"', + public, + ] + }, + { + include: [ + "", + public, + '"tbb/detail/_hash_compare.h"', + public, + ] + }, +] diff --git a/build_support/lint_exclusions.txt b/build_support/lint_exclusions.txt new file mode 100644 index 0000000..ac3933d --- /dev/null +++ b/build_support/lint_exclusions.txt @@ -0,0 +1,13 @@ +*_generated* +*parquet_constants.* +*parquet_types.* +*windows_compatibility.h +*pyarrow_api.h +*pyarrow_lib.h +*python/config.h +*python/platform.h +*third_party/roaring_bitmap/* +*vendored/* +*RcppExports.cpp* +*arrowExports.cpp* +*src/paimon/testing/utils/test_helper.h diff --git a/build_support/lintutils.py b/build_support/lintutils.py new file mode 100644 index 0000000..3b2e685 --- /dev/null +++ b/build_support/lintutils.py @@ -0,0 +1,119 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import multiprocessing as mp +import os +import re +from fnmatch import fnmatch +from subprocess import Popen + + +def chunk(seq, n): + """ + divide a sequence into equal sized chunks + (the last chunk may be smaller, but won't be empty) + """ + chunks = [] + some = [] + for element in seq: + if len(some) == n: + chunks.append(some) + some = [] + some.append(element) + if len(some) > 0: + chunks.append(some) + return chunks + + +def dechunk(chunks): + "flatten chunks into a single list" + seq = [] + for chunk in chunks: + seq.extend(chunk) + return seq + + +def run_parallel(cmds, **kwargs): + """ + Run each of cmds (with shared **kwargs) using subprocess.Popen + then wait for all of them to complete. + Runs batches of multiprocessing.cpu_count() * 2 from cmds + returns a list of tuples containing each process' + returncode, stdout, stderr + """ + complete = [] + for cmds_batch in chunk(cmds, mp.cpu_count() * 2): + procs_batch = [Popen(cmd, **kwargs) for cmd in cmds_batch] + for proc in procs_batch: + stdout, stderr = proc.communicate() + complete.append((proc.returncode, stdout, stderr)) + return complete + + +_source_extensions = ''' +.h +.cc +.cpp +.c +'''.split() + + +def need_do_lint(path, exclude_globs=[]): + # filter out non-source files + if os.path.splitext(path)[1] not in _source_extensions: + return False + # filter out files that match the globs in the globs file + if any([fnmatch(path, glob) for glob in exclude_globs]): + return False + return True + + +def get_sources(source_dir, exclude_globs=[]): + sources = [] + for directory, subdirs, basenames in os.walk(source_dir): + for path in [os.path.join(directory, basename) + for basename in basenames]: + path = os.path.abspath(path) + if (need_do_lint(path, exclude_globs)): + sources.append(path) + return sources + + +def _remove_color(colored_text): + ansi_re = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]') + clean_text = ansi_re.sub('', colored_text.decode('utf-8')) + return clean_text + + +def stdout_pathcolonline(completed_process, filenames): + """ + given a completed process which may have reported some files as problematic + by printing the path name followed by ':' then a line number, examine + stdout and return the set of actually reported file names + """ + returncode, stdout, stderr = completed_process + bfilenames = set() + for filename in filenames: + bfilenames.add(filename + ":") + problem_files = set() + for line in _remove_color(stdout).splitlines(): + for filename in bfilenames: + if line.startswith(filename): + problem_files.add(filename) + bfilenames.remove(filename) + break + return problem_files, stdout.decode('utf-8') diff --git a/build_support/lsan-suppressions.txt b/build_support/lsan-suppressions.txt new file mode 100644 index 0000000..927afb3 --- /dev/null +++ b/build_support/lsan-suppressions.txt @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# False positive from atexit() registration in libc +leak:*__new_exitfn* diff --git a/build_support/run-test.sh b/build_support/run-test.sh new file mode 100755 index 0000000..0656940 --- /dev/null +++ b/build_support/run-test.sh @@ -0,0 +1,244 @@ +#!/bin/bash +# Copyright 2014 Cloudera, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Script which wraps running a test and redirects its output to a +# test log directory. +# +# Arguments: +# $1 - Base path for logs/artifacts. +# $2 - type of test (e.g. test or benchmark) +# $3 - path to executable +# $ARGN - arguments for executable +# + +OUTPUT_ROOT=$1 +shift +ROOT=$(cd $(dirname $BASH_SOURCE)/..; pwd) + +TEST_LOGDIR=$OUTPUT_ROOT/build/$1-logs +mkdir -p $TEST_LOGDIR + +RUN_TYPE=$1 +shift +TEST_DEBUGDIR=$OUTPUT_ROOT/build/$RUN_TYPE-debug +mkdir -p $TEST_DEBUGDIR + +TEST_DIRNAME=$(cd $(dirname $1); pwd) +TEST_FILENAME=$(basename $1) +shift +TEST_EXECUTABLE="$TEST_DIRNAME/$TEST_FILENAME" +TEST_NAME=$(echo $TEST_FILENAME | perl -pe 's/\..+?$//') # Remove path and extension (if any). + +# We run each test in its own subdir to avoid core file related races. +TEST_WORKDIR=$OUTPUT_ROOT/build/test-work/$TEST_NAME +mkdir -p $TEST_WORKDIR +pushd $TEST_WORKDIR >/dev/null || exit 1 +# copy test data before run test +TEST_DATA_SRC_DIR=$OUTPUT_ROOT/../test/test_data +TEST_DATA_DST_DIR=$TEST_WORKDIR/test +mkdir -p $TEST_DATA_DST_DIR +cp -r $TEST_DATA_SRC_DIR $TEST_DATA_DST_DIR +rm -f * + +set -o pipefail + +LOGFILE=$TEST_LOGDIR/$TEST_NAME.txt +XMLFILE=$TEST_LOGDIR/$TEST_NAME.xml + +TEST_EXECUTION_ATTEMPTS=1 + +# Remove both the uncompressed output, so the developer doesn't accidentally get confused +# and read output from a prior test run. +rm -f $LOGFILE $LOGFILE.gz + +pipe_cmd=cat + +function setup_sanitizers() { + # Sets environment variables for different sanitizers (it configures how) the run_tests. Function works. + + # Configure TSAN (ignored if this isn't a TSAN build). + # + # Deadlock detection (new in clang 3.5) is disabled because: + # 1. The clang 3.5 deadlock detector crashes in some unit tests. It + # needs compiler-rt commits c4c3dfd, 9a8efe3, and possibly others. + # 2. Many unit tests report lock-order-inversion warnings; they should be + # fixed before reenabling the detector. + TSAN_OPTIONS="$TSAN_OPTIONS detect_deadlocks=0" + TSAN_OPTIONS="$TSAN_OPTIONS suppressions=$ROOT/build_support/tsan-suppressions.txt" + TSAN_OPTIONS="$TSAN_OPTIONS history_size=7" + export TSAN_OPTIONS + + UBSAN_OPTIONS="$UBSAN_OPTIONS print_stacktrace=1" + UBSAN_OPTIONS="$UBSAN_OPTIONS suppressions=$ROOT/build_support/ubsan-suppressions.txt" + export UBSAN_OPTIONS + + # Enable leak detection even under LLVM 3.4, where it was disabled by default. + # This flag only takes effect when running an ASAN build. + # ASAN_OPTIONS="$ASAN_OPTIONS detect_leaks=1" + # export ASAN_OPTIONS + + # Set up suppressions for LeakSanitizer + LSAN_OPTIONS="$LSAN_OPTIONS suppressions=$ROOT/build_support/lsan-suppressions.txt" + export LSAN_OPTIONS +} + +function run_test() { + # Run gtest style tests with sanitizers if they are setup appropriately. + + # gtest won't overwrite old junit test files, resulting in a build failure + # even when retries are successful. + rm -f $XMLFILE + + $TEST_EXECUTABLE "$@" 2>&1 \ + | ${PYTHON:-python3} $ROOT/build_support/asan_symbolize.py \ + | ${CXXFILT:-c++filt} \ + | $ROOT/build_support/stacktrace_addr2line.pl $TEST_EXECUTABLE \ + | $pipe_cmd 2>&1 | tee $LOGFILE + STATUS=$? + + # TSAN doesn't always exit with a non-zero exit code due to a bug: + # mutex errors don't get reported through the normal error reporting infrastructure. + # So we make sure to detect this and exit 1. + # + # Additionally, certain types of failures won't show up in the standard JUnit + # XML output from gtest. We assume that gtest knows better than us and our + # regexes in most cases, but for certain errors we delete the resulting xml + # file and let our own post-processing step regenerate it. + export GREP=$(which egrep) + if zgrep --silent "ThreadSanitizer|Leak check.*detected leaks" $LOGFILE ; then + echo ThreadSanitizer or leak check failures in $LOGFILE + STATUS=1 + rm -f $XMLFILE + fi +} + +function print_coredumps() { + # The script expects core files relative to the build directory with unique + # names per test executable because of the parallel running. So the corefile + # patterns must be set with prefix `core.{test-executable}*`: + # + # In case of macOS: + # sudo sysctl -w kern.corefile=core.%N.%P + # On Linux: + # sudo sysctl -w kernel.core_pattern=core.%e.%p + # + # and the ulimit must be increased: + # ulimit -c unlimited + + # filename is truncated to the first 15 characters in case of linux, so limit + # the pattern for the first 15 characters + FILENAME=$(basename "${TEST_EXECUTABLE}") + FILENAME=$(echo ${FILENAME} | cut -c-15) + PATTERN="^core\.${FILENAME}" + + COREFILES=$(ls | grep $PATTERN) + if [ -n "$COREFILES" ]; then + echo "Found core dump, printing backtrace:" + + for COREFILE in $COREFILES; do + # Print backtrace + if [ "$(uname)" == "Darwin" ]; then + lldb -c "${COREFILE}" --batch --one-line "thread backtrace all -e true" + else + gdb -c "${COREFILE}" $TEST_EXECUTABLE -ex "thread apply all bt" -ex "set pagination 0" -batch + fi + # Remove the coredump, regenerate it via running the test case directly + rm "${COREFILE}" + done + fi +} + +function post_process_tests() { + # If we have a LeakSanitizer report, and XML reporting is configured, add a new test + # case result to the XML file for the leak report. Otherwise Jenkins won't show + # us which tests had LSAN errors. + if zgrep --silent "ERROR: LeakSanitizer: detected memory leaks" $LOGFILE ; then + echo Test had memory leaks. Editing XML + perl -p -i -e ' + if (m##) { + print "\n"; + print " \n"; + print " See txt log file for details\n"; + print " \n"; + print "\n"; + }' $XMLFILE + fi +} + +function run_other() { + # Generic run function for test like executables that aren't actually gtest + $TEST_EXECUTABLE "$@" 2>&1 | $pipe_cmd > $LOGFILE + STATUS=$? +} + +if [ $RUN_TYPE = "test" ]; then + setup_sanitizers +fi + +# Run the actual test. +for ATTEMPT_NUMBER in $(seq 1 $TEST_EXECUTION_ATTEMPTS) ; do + if [ $ATTEMPT_NUMBER -lt $TEST_EXECUTION_ATTEMPTS ]; then + # If the test fails, the test output may or may not be left behind, + # depending on whether the test cleaned up or exited immediately. Either + # way we need to clean it up. We do this by comparing the data directory + # contents before and after the test runs, and deleting anything new. + # + # The comm program requires that its two inputs be sorted. + TEST_TMPDIR_BEFORE=$(find $TEST_TMPDIR -maxdepth 1 -type d | sort) + fi + + if [ $ATTEMPT_NUMBER -lt $TEST_EXECUTION_ATTEMPTS ]; then + # Now delete any new test output. + TEST_TMPDIR_AFTER=$(find $TEST_TMPDIR -maxdepth 1 -type d | sort) + DIFF=$(comm -13 <(echo "$TEST_TMPDIR_BEFORE") \ + <(echo "$TEST_TMPDIR_AFTER")) + for DIR in $DIFF; do + # Multiple tests may be running concurrently. To avoid deleting the + # wrong directories, constrain to only directories beginning with the + # test name. + # + # This may delete old test directories belonging to this test, but + # that's not typically a concern when rerunning flaky tests. + if [[ $DIR =~ ^$TEST_TMPDIR/$TEST_NAME ]]; then + echo Deleting leftover flaky test directory "$DIR" + rm -Rf "$DIR" + fi + done + fi + echo "Running $TEST_NAME, redirecting output into $LOGFILE" \ + "(attempt ${ATTEMPT_NUMBER}/$TEST_EXECUTION_ATTEMPTS)" + if [ $RUN_TYPE = "test" ]; then + run_test $* + else + run_other $* + fi + if [ "$STATUS" -eq "0" ]; then + break + elif [ "$ATTEMPT_NUMBER" -lt "$TEST_EXECUTION_ATTEMPTS" ]; then + echo Test failed attempt number $ATTEMPT_NUMBER + echo Will retry... + fi +done + +if [ $RUN_TYPE = "test" ]; then + post_process_tests +fi + +print_coredumps + +popd +rm -Rf $TEST_WORKDIR + +exit $STATUS diff --git a/build_support/run_clang_format.py b/build_support/run_clang_format.py new file mode 100755 index 0000000..5b63206 --- /dev/null +++ b/build_support/run_clang_format.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import print_function +import lintutils +from subprocess import PIPE +import argparse +import difflib +import multiprocessing as mp +import sys +from functools import partial + + +# examine the output of clang-format and if changes are +# present assemble a (unified)patch of the difference +def _check_one_file(filename, formatted): + with open(filename, "rb") as reader: + original = reader.read() + + if formatted != original: + # Run the equivalent of diff -u + diff = list(difflib.unified_diff( + original.decode('utf8').splitlines(True), + formatted.decode('utf8').splitlines(True), + fromfile=filename, + tofile="{} (after clang format)".format( + filename))) + else: + diff = None + + return filename, diff + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Runs clang-format on all of the source " + "files. If --fix is specified enforce format by " + "modifying in place, otherwise compare the output " + "with the existing file and output any necessary " + "changes as a patch in unified diff format") + parser.add_argument("--clang_format_binary", + required=True, + help="Path to the clang-format binary") + parser.add_argument("--exclude_globs", + help="Filename containing globs for files " + "that should be excluded from the checks") + parser.add_argument("--source_dir", + required=True, + help="Root directory of the source code") + parser.add_argument("--fix", default=False, + action="store_true", + help="If specified, will re-format the source " + "code instead of comparing the re-formatted " + "output, defaults to %(default)s") + parser.add_argument("--quiet", default=False, + action="store_true", + help="If specified, only print errors") + arguments = parser.parse_args() + + exclude_globs = [] + if arguments.exclude_globs: + with open(arguments.exclude_globs) as f: + exclude_globs.extend(line.strip() for line in f) + + formatted_filenames = [] + for path in lintutils.get_sources(arguments.source_dir, exclude_globs): + formatted_filenames.append(str(path)) + + if arguments.fix: + if not arguments.quiet: + print("\n".join(map(lambda x: "Formatting {}".format(x), + formatted_filenames))) + + # Break clang-format invocations into chunks: each invocation formats + # 16 files. Wait for all processes to complete + results = lintutils.run_parallel([ + [arguments.clang_format_binary, "-i"] + some + for some in lintutils.chunk(formatted_filenames, 16) + ]) + for returncode, stdout, stderr in results: + # if any clang-format reported a parse error, bubble it + if returncode != 0: + sys.exit(returncode) + + else: + # run an instance of clang-format for each source file in parallel, + # then wait for all processes to complete + results = lintutils.run_parallel([ + [arguments.clang_format_binary, filename] + for filename in formatted_filenames + ], stdout=PIPE, stderr=PIPE) + + checker_args = [] + for filename, res in zip(formatted_filenames, results): + # if any clang-format reported a parse error, bubble it + returncode, stdout, stderr = res + if returncode != 0: + print(stderr) + sys.exit(returncode) + checker_args.append((filename, stdout)) + + error = False + pool = mp.Pool() + try: + # check the output from each invocation of clang-format in parallel + for filename, diff in pool.starmap(_check_one_file, checker_args): + if not arguments.quiet: + print("Checking {}".format(filename)) + if diff: + print("{} had clang-format style issues".format(filename)) + # Print out the diff to stderr + error = True + # pad with a newline + print(file=sys.stderr) + sys.stderr.writelines(diff) + except Exception: + error = True + raise + finally: + pool.terminate() + pool.join() + sys.exit(1 if error else 0) diff --git a/build_support/run_clang_tidy.py b/build_support/run_clang_tidy.py new file mode 100755 index 0000000..611a7e2 --- /dev/null +++ b/build_support/run_clang_tidy.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import print_function +import argparse +from fnmatch import fnmatch +import multiprocessing as mp +import lintutils +from subprocess import PIPE +import sys +from functools import partial + + +def _get_chunk_key(filenames): + # lists are not hashable so key on the first filename in a chunk + return filenames[0] + + +# clang-tidy outputs complaints in '/path:line_number: complaint' format, +# so we can scan its output to get a list of files to fix +def _check_some_files(completed_processes, filenames): + result = completed_processes[_get_chunk_key(filenames)] + return lintutils.stdout_pathcolonline(result, filenames) + + +def _check_all(cmd, filenames): + # each clang-tidy instance will process 16 files + chunks = lintutils.chunk(filenames, 16) + cmds = [cmd + some for some in chunks] + results = lintutils.run_parallel(cmds, stderr=PIPE, stdout=PIPE) + error = False + # record completed processes (keyed by the first filename in the input + # chunk) for lookup in _check_some_files + completed_processes = { + _get_chunk_key(some): result + for some, result in zip(chunks, results) + } + checker = partial(_check_some_files, completed_processes) + pool = mp.Pool() + try: + # check output of completed clang-tidy invocations in parallel + for problem_files, stdout in pool.imap(checker, chunks): + if problem_files: + msg = "clang-tidy suggested fixes for {}" + print("\n".join(map(msg.format, problem_files))) + print(stdout) + error = True + except Exception: + error = True + raise + finally: + pool.terminate() + pool.join() + + if error: + sys.exit(1) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Runs clang-tidy on all ", + fromfile_prefix_chars="@" + ) + parser.add_argument("--clang_tidy_binary", + required=True, + help="Path to the clang-tidy binary") + parser.add_argument("--exclude_globs", + help="Filename containing globs for files " + "that should be excluded from the checks") + parser.add_argument("--compile_commands", + required=True, + help="compile_commands.json to pass clang-tidy") + path_group = parser.add_mutually_exclusive_group(required=True) + path_group.add_argument("--source_dir", + nargs="+", + help="Root directory(s) of the source code") + path_group.add_argument("--source_file", + nargs="*", + help="file path(s) of every source code") + parser.add_argument("--extra_arg", + help="Extra arguments to pass to clang-tidy") + parser.add_argument("--fix", default=False, + action="store_true", + help="If specified, will attempt to fix the " + "source code instead of recommending fixes, " + "defaults to %(default)s") + parser.add_argument("--quiet", default=False, + action="store_true", + help="If specified, only print errors") + parser.add_argument("--use_color", default=True, + action="store_true", + help="If specified, print errors in color") + arguments = parser.parse_args() + + exclude_globs = [] + if arguments.exclude_globs: + for line in open(arguments.exclude_globs): + exclude_globs.append(line.strip()) + + linted_filenames = [] + if arguments.source_dir: + for dir in arguments.source_dir: + for path in lintutils.get_sources(dir, exclude_globs): + linted_filenames.append(path) + elif arguments.source_file: + for path in arguments.source_file: + if (lintutils.need_do_lint(path, exclude_globs)): + linted_filenames.append(path) + + if not arguments.quiet: + msg = 'Tidying {}' if arguments.fix else 'Checking {}' + print("\n".join(map(msg.format, linted_filenames))) + + cmd = [ + arguments.clang_tidy_binary, + '-p', + arguments.compile_commands + ] + + if arguments.extra_arg: + cmd.append("--extra-arg={}".format(arguments.extra_arg)) + + if arguments.use_color: + cmd.append("--use-color") + + if arguments.fix: + cmd.append('-fix') + results = lintutils.run_parallel( + [cmd + some for some in lintutils.chunk(linted_filenames, 16)]) + for returncode, stdout, stderr in results: + if returncode != 0: + sys.exit(returncode) + else: + _check_all(cmd, linted_filenames) diff --git a/build_support/sanitizer-disallowed-entries.txt b/build_support/sanitizer-disallowed-entries.txt new file mode 100644 index 0000000..f6900c6 --- /dev/null +++ b/build_support/sanitizer-disallowed-entries.txt @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Workaround for a problem with gmock where a runtime error is caused by a call on a null pointer, +# on a mocked object. +# Seen error: +# thirdparty/gmock-1.7.0/include/gmock/gmock-spec-builders.h:1529:12: runtime error: member call on null pointer of type 'testing::internal::ActionResultHolder' +fun:*testing*internal*InvokeWith* diff --git a/build_support/stacktrace_addr2line.pl b/build_support/stacktrace_addr2line.pl new file mode 100755 index 0000000..7664bab --- /dev/null +++ b/build_support/stacktrace_addr2line.pl @@ -0,0 +1,92 @@ +#!/usr/bin/perl +# Copyright 2014 Cloudera, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +####################################################################### +# This script will convert a stack trace with addresses: +# @ 0x5fb015 kudu::master::Master::Init() +# @ 0x5c2d38 kudu::master::MiniMaster::StartOnPorts() +# @ 0x5c31fa kudu::master::MiniMaster::Start() +# @ 0x58270a kudu::MiniCluster::Start() +# @ 0x57dc71 kudu::CreateTableStressTest::SetUp() +# To one with line numbers: +# @ 0x5fb015 kudu::master::Master::Init() at /home/mpercy/src/kudu/src/master/master.cc:54 +# @ 0x5c2d38 kudu::master::MiniMaster::StartOnPorts() at /home/mpercy/src/kudu/src/master/mini_master.cc:52 +# @ 0x5c31fa kudu::master::MiniMaster::Start() at /home/mpercy/src/kudu/src/master/mini_master.cc:33 +# @ 0x58270a kudu::MiniCluster::Start() at /home/mpercy/src/kudu/src/integration-tests/mini_cluster.cc:48 +# @ 0x57dc71 kudu::CreateTableStressTest::SetUp() at /home/mpercy/src/kudu/src/integration-tests/create-table-stress-test.cc:61 +# +# If the script detects that the output is not symbolized, it will also attempt +# to determine the function names, i.e. it will convert: +# @ 0x5fb015 +# @ 0x5c2d38 +# @ 0x5c31fa +# To: +# @ 0x5fb015 kudu::master::Master::Init() at /home/mpercy/src/kudu/src/master/master.cc:54 +# @ 0x5c2d38 kudu::master::MiniMaster::StartOnPorts() at /home/mpercy/src/kudu/src/master/mini_master.cc:52 +# @ 0x5c31fa kudu::master::MiniMaster::Start() at /home/mpercy/src/kudu/src/master/mini_master.cc:33 +####################################################################### +use strict; +use warnings; + +if (!@ARGV) { + die < is magical in Perl. +while (defined(my $input = )) { + if ($input =~ /^\s+\@\s+(0x[[:xdigit:]]{6,})(?:\s+(\S+))?/) { + my $addr = $1; + my $lookup_func_name = (!defined $2); + if (!exists($addr2line_map{$addr})) { + $addr2line_map{$addr} = `addr2line -ifC -e $binary $addr`; + } + chomp $input; + $input .= parse_addr2line_output($addr2line_map{$addr}, $lookup_func_name) . "\n"; + } + print $input; +} + +exit 0; diff --git a/build_support/tsan-suppressions.txt b/build_support/tsan-suppressions.txt new file mode 100644 index 0000000..13a8339 --- /dev/null +++ b/build_support/tsan-suppressions.txt @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/build_support/ubsan-suppressions.txt b/build_support/ubsan-suppressions.txt new file mode 100644 index 0000000..13a8339 --- /dev/null +++ b/build_support/ubsan-suppressions.txt @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. From 2afcb5a4cbf600283540b591d5c1fca3957b10ef Mon Sep 17 00:00:00 2001 From: "jinli.zjw" Date: Mon, 25 May 2026 15:39:15 +0800 Subject: [PATCH 2/4] chore(build): update build support license declaration --- LICENSE | 1 + 1 file changed, 1 insertion(+) diff --git a/LICENSE b/LICENSE index dbb46e5..8878158 100644 --- a/LICENSE +++ b/LICENSE @@ -249,6 +249,7 @@ This product includes code from Apache Arrow. * Core utilities: * docs utilities in docs/ directory + * build support utilities in build_support/ directory * basic utilities in - include/paimon/compare.h - include/paimon/macros.h From cd6c9367c91441697e80151ea48bd8a12ba4cb89 Mon Sep 17 00:00:00 2001 From: "jinli.zjw" Date: Mon, 25 May 2026 18:48:23 +0800 Subject: [PATCH 3/4] chore(build): refine build support license declarations --- LICENSE | 46 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 8878158..118b65a 100644 --- a/LICENSE +++ b/LICENSE @@ -249,13 +249,26 @@ This product includes code from Apache Arrow. * Core utilities: * docs utilities in docs/ directory - * build support utilities in build_support/ directory * basic utilities in - include/paimon/compare.h - include/paimon/macros.h - include/paimon/status.h - include/paimon/string_builder.h - src/paimon/common/utils/status.cpp +* Build support utilities: + * build_support/asan-suppressions.txt + * build_support/get-upstream-commit.sh + * build_support/iwyu/iwyu-filter.awk + * build_support/iwyu/iwyu.sh + * build_support/iwyu/mappings/*.imp + * build_support/lint_exclusions.txt + * build_support/lintutils.py + * build_support/lsan-suppressions.txt + * build_support/run_clang_format.py + * build_support/run_clang_tidy.py + * build_support/sanitizer-disallowed-entries.txt + * build_support/tsan-suppressions.txt + * build_support/ubsan-suppressions.txt * Build system modules: * cmake_modules/BuildUtils.cmake * cmake_modules/DefineOptions.cmake @@ -265,4 +278,35 @@ This product includes code from Apache Arrow. Copyright: 2016-2024 The Apache Software Foundation. Home page: https://arrow.apache.org/ License: https://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This product includes code from LLVM compiler-rt. + +* AddressSanitizer symbolization utility: + * build_support/asan_symbolize.py + +Copyright: University of Illinois / LLVM contributors. +License: University of Illinois/NCSA Open Source License. + +-------------------------------------------------------------------------------- + +This product includes code from include-what-you-use. + +* IWYU driver utility: + * build_support/iwyu/iwyu_tool.py + +Copyright: 2003-2010 University of Illinois at Urbana-Champaign. +License: University of Illinois/NCSA Open Source License. + +-------------------------------------------------------------------------------- + +This product includes code from Apache Kudu / Cloudera build support utilities. + +* Test and stacktrace utilities: + * build_support/run-test.sh + * build_support/stacktrace_addr2line.pl + +Copyright: 2014 Cloudera, Inc. +License: https://www.apache.org/licenses/LICENSE-2.0 -------------------------------------------------------------------------------- From 3b35c132d453c51dfac62e5c9f0e453c58383940 Mon Sep 17 00:00:00 2001 From: "jinli.zjw" Date: Mon, 25 May 2026 19:00:31 +0800 Subject: [PATCH 4/4] chore(build): add uiuc ncsa license text --- LICENSE | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/LICENSE b/LICENSE index d557e99..511feeb 100644 --- a/LICENSE +++ b/LICENSE @@ -302,6 +302,46 @@ This product includes code from include-what-you-use. Copyright: 2003-2010 University of Illinois at Urbana-Champaign. License: University of Illinois/NCSA Open Source License. +The University of Illinois/NCSA Open Source License + +Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + -------------------------------------------------------------------------------- This product includes code from Apache Kudu / Cloudera build support utilities.