diff --git a/recipes/arrow/all/CMakeLists.txt b/recipes/arrow/all/CMakeLists.txt new file mode 100644 index 0000000000000..5fce337b405db --- /dev/null +++ b/recipes/arrow/all/CMakeLists.txt @@ -0,0 +1,7 @@ +cmake_minimum_required(VERSION 3.1) +project(cmake_wrapper) + +include(conanbuildinfo.cmake) +conan_basic_setup() + +add_subdirectory(source_subfolder/cpp) diff --git a/recipes/arrow/all/conandata.yml b/recipes/arrow/all/conandata.yml new file mode 100644 index 0000000000000..c6932a261c5fb --- /dev/null +++ b/recipes/arrow/all/conandata.yml @@ -0,0 +1,14 @@ +sources: + "1.0.0": + url: "https://github.com/apache/arrow/archive/apache-arrow-1.0.0.tar.gz" + sha256: "08fbd4c633c08939850d619ca0224c75d7a0526467c721c0838b8aa7efccb270" +patches: + "1.0.0": + - base_path: "source_subfolder" + patch_file: "patches/1.0.0-0001-cmake.patch" + - base_path: "source_subfolder" + patch_file: "patches/1.0.0-0002-jemalloc.patch" + - base_path: "source_subfolder" + patch_file: "patches/1.0.0-0003-fix-shared-msvc.patch" + - base_path: "source_subfolder" + patch_file: "patches/1.0.0-0004-mallctl-takes-size_t.patch" diff --git a/recipes/arrow/all/conanfile.py b/recipes/arrow/all/conanfile.py new file mode 100644 index 0000000000000..368fd39430bd0 --- /dev/null +++ b/recipes/arrow/all/conanfile.py @@ -0,0 +1,479 @@ +from conans import ConanFile, tools, CMake +from conans.errors import ConanInvalidConfiguration +import os + + +class ArrowConan(ConanFile): + name = "arrow" + description = "Apache Arrow is a cross-language development platform for in-memory data" + topics = ("conan", "arrow", "memory") + url = "https://github.com/conan-io/conan-center-index" + homepage = "https://arrow.apache.org/" + license = ("Apache-2.0",) + exports_sources = "CMakeLists.txt", "patches/**" + generators = "cmake", "cmake_find_package" + settings = "os", "compiler", "build_type", "arch" + options = { + "shared": [True, False], + "fPIC": [True, False], + "gandiva": [True, False], + "parquet": [True, False], + "plasma": [True, False], + "cli": [True, False], + "compute": ["auto", True, False], + "dataset_modules": [True, False], + "deprecated": [True, False], + "encryption": [True, False], + "filesystem_layer": [True, False], + "hdfs_bridgs": [True, False], + "with_backtrace": [True, False], + "with_boost": ["auto", True, False], + "with_csv": [True, False], + "with_cuda": [True, False], + "with_flight_rpc": [True, False], + "with_gflags": ["auto", True, False], + "with_glog": ["auto", True, False], + "with_grpc": ["auto", True, False], + "with_hiveserver2": [True, False], + "with_jemalloc": ["auto", True, False], + "with_json": [True, False], + "with_llvm": ["auto", True, False], + "with_openssl": ["auto", True, False], + "with_orc": [True, False], + "with_protobuf": ["auto", True, False], + "with_re2": ["auto", True, False], + "with_s3": [True, False], + "with_utf8proc": ["auto", True, False], + "with_brotli": [True, False], + "with_bz2": [True, False], + "with_lz4": [True, False], + "with_snappy": [True, False], + "with_zlib": [True, False], + "with_zstd": [True, False], + } + default_options = { + "shared": False, + "fPIC": True, + "gandiva": False, + "parquet": False, + "plasma": False, + "cli": False, + "compute": "auto", + "dataset_modules": False, + "deprecated": True, + "encryption": False, + "filesystem_layer": False, + "hdfs_bridgs": False, + "with_backtrace": False, + "with_boost": "auto", + "with_brotli": False, + "with_bz2": False, + "with_csv": False, + "with_cuda": False, + "with_flight_rpc": False, + "with_gflags": "auto", + "with_jemalloc": "auto", + "with_glog": "auto", + "with_grpc": "auto", + "with_hiveserver2": False, + "with_json": False, + "with_llvm": "auto", + "with_openssl": "auto", + "with_orc": False, + "with_protobuf": "auto", + "with_re2": "auto", + "with_s3": False, + "with_utf8proc": "auto", + "with_lz4": False, + "with_snappy": False, + "with_zlib": False, + "with_zstd": False, + } + + _cmake = None + + @property + def _source_subfolder(self): + return "source_subfolder" + + def config_options(self): + if self.settings.os == "Windows": + del self.options.fPIC + + def configure(self): + if self.settings.compiler == "clang" and self.settings.compiler.version <= tools.Version("3.9"): + raise ConanInvalidConfiguration("This recipe does not support this compiler version") + + if self.options.shared: + del self.options.fPIC + if self.options.compute == False and not self._compute(True): + raise ConanInvalidConfiguration("compute options is required (or choose auto)") + if self.options.with_jemalloc == False and self._with_jemalloc(True): + raise ConanInvalidConfiguration("with_jemalloc option is required (or choose auto)") + if self.options.with_re2 == False and self._with_re2(True): + raise ConanInvalidConfiguration("with_re2 option is required (or choose auto)") + if self.options.with_protobuf == False and self._with_protobuf(True): + raise ConanInvalidConfiguration("with_protobuf option is required (or choose auto)") + if self.options.with_gflags == False and self._with_gflags(True): + raise ConanInvalidConfiguration("with_gflags options is required (or choose auto)") + if self.options.with_grpc == False and self._with_grpc(True): + raise ConanInvalidConfiguration("with_grpc options is required (or choose auto)") + if self.options.with_boost == False and self._with_boost(True): + raise ConanInvalidConfiguration("with_boost options is required (or choose auto)") + if self.options.with_openssl == False and self._with_openssl(True): + raise ConanInvalidConfiguration("with_openssl options is required (or choose auto)") + if self.options.with_llvm == False and self._with_llvm(True): + raise ConanInvalidConfiguration("with_openssl options is required (or choose auto)") + + def _compute(self, required=False): + if required or self.options.compute == "auto": + return bool(self.options.dataset_modules) + else: + return bool(self.options.compute) + + def _with_jemalloc(self, required=False): + if required or self.options.with_jemalloc == "auto": + return bool("BSD" in str(self.settings.os)) + else: + return bool(self.options.with_jemalloc) + + def _with_re2(self, required=False): + if required or self.options.with_re2 == "auto": + return bool(self.options.gandiva) + else: + return bool(self.options.with_re2) + + def _with_protobuf(self, required=False): + if required or self.options.with_protobuf == "auto": + return bool(self.options.gandiva or self.options.with_flight_rpc or self.options.with_orc) + else: + return bool(self.options.with_protobuf) + + def _with_gflags(self, required=False): + if required or self.options.with_gflags == "auto": + return bool(self.options.plasma or self._with_glog() or self._with_grpc()) + else: + return bool(self.options.with_gflags) + + def _with_glog(self, required=False): + if required or self.options.with_glog == "auto": + return False + else: + return bool(self.options.with_glog) + + def _with_grpc(self, required=False): + if required or self.options.with_grpc == "auto": + return bool(self.options.with_flight_rpc) + else: + return bool(self.options.with_grpc) + + def _with_boost(self, required=False): + if required or self.options.with_boost == "auto": + if self.options.gandiva: + return True + if self.options.parquet and self.settings.compiler == "gcc" and self.settings.compiler.version < tools.Version("4.9"): + return True + return False + else: + return bool(self.options.with_boost) + + def _with_thrift(self, required=False): + # No self.options.with_thift exists + return bool(self.options.with_hiveserver2 or self.options.parquet) + + def _with_utf8proc(self, required=False): + if required or self.options.with_utf8proc == "auto": + return False + else: + return bool(self.options.with_utf8proc) + + def _with_llvm(self, required=False): + if required or self.options.with_llvm == "auto": + return bool(self.options.gandiva) + else: + return bool(self.options.with_openssl) + + def _with_openssl(self, required=False): + if required or self.options.with_openssl == "auto": + return bool(self.options.encryption or self.options.with_flight_rpc or self.options.with_s3) + else: + return bool(self.options.with_openssl) + + def build_requirements(self): + if self._with_grpc(): + raise ConanInvalidConfiguration("CCI has no grpc recipe (yet)") + + def requirements(self): + if self._with_thrift(): + self.requires("thrift/0.13.0") + if self.options.with_backtrace: + raise ConanInvalidConfiguration("CCI has no backtrace recipe (yet)") + if self._with_protobuf(): + self.requires("protobuf/3.11.4") + if self._with_jemalloc(): + self.requires("jemalloc/5.2.1") + if self._with_boost(): + self.requires("boost/1.72.0") + if self.options.with_cuda: + raise ConanInvalidConfiguration("CCI has no cuda recipe (yet)") + if self.options.with_flight_rpc: + raise ConanInvalidConfiguration("CCI has no flight_rpc recipe (yet)") + if self._with_gflags(): + self.requires("gflags/2.2.2") + if self._with_glog(): + self.requires("glog/0.4.0") + if self.options.with_hiveserver2: + raise ConanInvalidConfiguration("CCI has no hiveserver2 recipe (yet)") + if self.options.with_json: + self.requires("rapidjson/1.1.0") + if self._with_llvm(): + raise ConanInvalidConfiguration("CCI has no llvm recipe (yet)") + if self._with_openssl(): + self.requires("openssl/1.1.1g") + if self.options.with_s3: + self.requires("aws-sdk-cpp/1.7.299") + if self.options.with_brotli: + self.requires("brotli/1.0.7") + if self.options.with_bz2: + self.requires("bzip2/1.0.8") + if self.options.with_orc: + raise ConanInvalidConfiguration("CCI has no orc recipe (yet)") + if self.options.with_lz4: + self.requires("lz4/1.9.2") + if self.options.with_snappy: + self.requires("snappy/1.1.8") + if self.options.with_zlib: + self.requires("zlib/1.2.11") + if self.options.with_zstd: + self.requires("zstd/1.4.4") + if self._with_re2(): + self.requires("re2/20200301") + + def source(self): + tools.get(**self.conan_data["sources"][self.version]) + extracted_dir = "arrow-apache-arrow-{}".format(self.version) + os.rename(extracted_dir, self._source_subfolder) + + def _configure_cmake(self): + if self._cmake: + return self._cmake + self._cmake = CMake(self) + if self.settings.compiler == "Visual Studio": + self._cmake.definitions["ARROW_USE_STATIC_CRT"] = "MT" in str(self.settings.compiler.runtime) + self._cmake.definitions["ARROW_DEPENDENCY_SOURCE"] = "SYSTEM" + self._cmake.definitions["ARROW_GANDIVA"] = self.options.gandiva + self._cmake.definitions["ARROW_PARQUET"] = self.options.parquet + self._cmake.definitions["ARROW_PLASMA"] = self.options.plasma + self._cmake.definitions["ARROW_DATASET"] = self.options.dataset_modules + self._cmake.definitions["ARROW_FILESYSTEM"] = self.options.filesystem_layer + self._cmake.definitions["PARQUET_REQUIRE_ENCRYPTION"] = self.options.encryption + self._cmake.definitions["ARROW_HDFS"] = self.options.hdfs_bridgs + self._cmake.definitions["ARROW_VERBOSE_THIRDPARTY_BUILD"] = True + self._cmake.definitions["ARROW_BUILD_SHARED"] = self.options.shared + self._cmake.definitions["ARROW_BUILD_STATIC"] = not self.options.shared + self._cmake.definitions["ARROW_NO_DEPRECATED_API"] = not self.options.deprecated + self._cmake.definitions["ARROW_FLIGHT"] = self.options.with_flight_rpc + self._cmake.definitions["ARROW_HIVESERVER2"] = self.options.with_hiveserver2 + self._cmake.definitions["ARROW_COMPUTE"] = self._compute() + self._cmake.definitions["ARROW_CSV"] = self.options.with_csv + self._cmake.definitions["ARROW_CUDA"] = self.options.with_cuda + self._cmake.definitions["ARROW_JEMALLOC"] = self._with_jemalloc() + self._cmake.definitions["ARROW_JSON"] = self.options.with_json + + # self._cmake.definitions["ARROW_BOOST_VENDORED"] = False + self._cmake.definitions["BOOST_SOURCE"] = "SYSTEM" + self._cmake.definitions["Protobuf_SOURCE"] = "SYSTEM" + self._cmake.definitions["gRPC_SOURCE"] = "SYSTEM" + if self._with_protobuf(): + self._cmake.definitions["ARROW_PROTOBUF_USE_SHARED"] = self.options["protobuf"].shared + self._cmake.definitions["ARROW_HDFS"] = self.options.hdfs_bridgs + self._cmake.definitions["ARROW_USE_GLOG"] = self._with_glog() + self._cmake.definitions["GLOG_SOURCE"] = "SYSTEM" + self._cmake.definitions["ARROW_WITH_BACKTRACE"] = self.options.with_backtrace + self._cmake.definitions["ARROW_WITH_BROTLI"] = self.options.with_brotli + self._cmake.definitions["Brotli_SOURCE"] = "SYSTEM" + self._cmake.definitions["ARROW_BROTLI_USE_SHARED"] = "brotli" in self.options and not self.options["brotli"].shared + self._cmake.definitions["gflags_SOURCE"] = "SYSTEM" + self._cmake.definitions["ARROW_WITH_BZ2"] = self.options.with_bz2 + self._cmake.definitions["BZip2_SOURCE"] = "SYSTEM" + self._cmake.definitions["ARROW_WITH_LZ4"] = self.options.with_lz4 + self._cmake.definitions["Lz4_SOURCE"] = "SYSTEM" + self._cmake.definitions["ARROW_WITH_SNAPPY"] = self.options.with_snappy + self._cmake.definitions["Snappy_SOURCE"] = "SYSTEM" + self._cmake.definitions["ARROW_WITH_ZLIB"] = self.options.with_zlib + self._cmake.definitions["RE2_SOURCE"] = "SYSTEM" + self._cmake.definitions["ZLIB_SOURCE"] = "SYSTEM" + self._cmake.definitions["ARROW_WITH_ZSTD"] = self.options.with_zstd + self._cmake.definitions["ZSTD_SOURCE"] = "SYSTEM" + self._cmake.definitions["ORC_SOURCE"] = "SYSTEM" + self._cmake.definitions["ARROW_WITH_THRIFT"] = self._with_thrift() + self._cmake.definitions["Thrift_SOURCE"] = "SYSTEM" + self._cmake.definitions["THRIFT_VERSION"] = "1.0" # a recent thrift does not require boost + self._cmake.definitions["ARROW_USE_OPENSSL"] = self._with_openssl() + if self._with_openssl(): + self._cmake.definitions["OPENSSL_ROOT_DIR"] = self.deps_cpp_info["openssl"].rootpath.replace("\\", "/") + if self._with_boost(): + self._cmake.definitions["ARROW_BOOST_USE_SHARED"] = self.options["boost"].shared + self._cmake.definitions["ARROW_S3"] = self.options.with_s3 + self._cmake.definitions["AWSSDK_SOURCE"] = "SYSTEM" + + self._cmake.definitions["ARROW_BUILD_UTILITIES"] = self.options.cli + self._cmake.definitions["ARROW_BUILD_INTEGRATION"] = False + self._cmake.definitions["ARROW_INSTALL_NAME_RPATH"] = False + self._cmake.definitions["ARROW_BUILD_EXAMPLES"] = False + self._cmake.definitions["ARROW_BUILD_TESTS"] = False + self._cmake.definitions["ARROW_ENABLE_TIMING_TESTS"] = False + self._cmake.definitions["ARROW_BUILD_BENCHMARKS"] = False + + self._cmake.definitions["LLVM_SOURCE"] = "SYSTEM" + self._cmake.definitions["ARROW_WITH_UTF8PROC"] = self._with_utf8proc() + self._cmake.definitions["utf8proc_SOURCE"] = "SYSTEM" + + if self.settings.compiler == "Visual Studio": + self._cmake.definitions["ARROW_USE_STATIC_CRT"] = "MT" in str(self.settings.compiler.runtime) + + if self._with_llvm(): + self._cmake.definitions["LLVM_DIR"] = self.deps_cpp_info["llvm"].rootpath.replace("\\", "/") + + self._cmake.configure() + return self._cmake + + def _patch_sources(self): + for patch in self.conan_data["patches"][self.version]: + tools.patch(**patch) + + def build(self): + if self.options.shared and self._with_jemalloc(): + if self.options["jemalloc"].enable_cxx: + raise ConanInvalidConfiguration("jemmalloc.enable_cxx of a static jemalloc must be disabled") + + self._patch_sources() + cmake = self._configure_cmake() + cmake.build() + + def package(self): + self.copy("LICENSE.txt", src=self._source_subfolder, dst="licenses") + self.copy("NOTICE.txt", src=self._source_subfolder, dst="licenses") + cmake = self._configure_cmake() + cmake.install() + + tools.rmdir(os.path.join(self.package_folder, "lib", "cmake")) + tools.rmdir(os.path.join(self.package_folder, "lib", "pkgconfig")) + tools.rmdir(os.path.join(self.package_folder, "share")) + + def _lib_name(self, name): + if self.settings.compiler == "Visual Studio" and not self.options.shared: + return "{}_static".format(name) + else: + return "{}".format(name) + + def package_id(self): + self.options.with_jemalloc = self._with_jemalloc() + self.info.options.with_gflags = self._with_gflags() + self.info.options.with_protobuf = self._with_protobuf() + self.info.options.with_re2 = self._with_re2() + self.info.options.with_jemalloc = self._with_jemalloc() + self.info.options.with_openssl = self._with_openssl() + self.info.options.with_boost = self._with_boost() + self.info.options.with_glog = self._with_glog() + self.info.options.with_grpc = self._with_grpc() + + def package_info(self): + self.cpp_info.components["libarrow"].libs = [self._lib_name("arrow")] + self.cpp_info.components["libarrow"].filenames["cmake_find_package"] = "Arrow" + self.cpp_info.components["libarrow"].filenames["cmake_find_package_multi"] = "Arrow" + self.cpp_info.components["libarrow"].names["cmake_find_package"] = "arrow" + self.cpp_info.components["libarrow"].names["cmake_find_package_multi"] = "arrow" + self.cpp_info.components["libarrow"].names["pkg_config"] = "arrow" + if not self.options.shared: + self.cpp_info.components["libarrow"].defines = ["ARROW_STATIC"] + if self.settings.os == "Linux": + self.cpp_info.components["libarrow"].system_libs = ["pthread"] + + + if self.options.parquet: + self.cpp_info.components["libparquet"].libs = [self._lib_name("parquet")] + self.cpp_info.components["libparquet"].filenames["cmake_find_package"] = "Parquet" + self.cpp_info.components["libparquet"].filenames["cmake_find_package_multi"] = "Parquet" + self.cpp_info.components["libparquet"].names["cmake_find_package"] = "parquet" + self.cpp_info.components["libparquet"].names["cmake_find_package_multi"] = "parquet" + self.cpp_info.components["libparquet"].names["pkg_config"] = "parquet" + self.cpp_info.components["libparquet"].requires = ["libarrow"] + + if self.options.plasma: + self.cpp_info.components["libplasma"].libs = [self._lib_name("plasma")] + self.cpp_info.components["libplasma"].filenames["cmake_find_package"] = "Plasma" + self.cpp_info.components["libplasma"].filenames["cmake_find_package_multi"] = "Arrow" + self.cpp_info.components["libplasma"].names["cmake_find_package"] = "plasma" + self.cpp_info.components["libplasma"].names["cmake_find_package_multi"] = "plasma" + self.cpp_info.components["libplasma"].names["pkg_config"] = "plasma" + self.cpp_info.components["libplasma"].requires = ["libarrow"] + + if self.options.gandiva: + self.cpp_info.components["libgandiva"].libs = [self._lib_name("gandiva")] + self.cpp_info.components["libgandiva"].filenames["cmake_find_package"] = "Gandiva" + self.cpp_info.components["libgandiva"].filenames["cmake_find_package_multi"] = "Gandiva" + self.cpp_info.components["libgandiva"].names["cmake_find_package"] = "gandiva" + self.cpp_info.components["libgandiva"].names["cmake_find_package_multi"] = "plasma" + self.cpp_info.components["libgandiva"].names["pkg_config"] = "plasma" + self.cpp_info.components["libgandiva"].requires = ["libarrow"] + + if self.options.dataset_modules: + self.cpp_info.components["dataset"].libs = ["arrow_dataset"] + + if self.options.cli: + binpath = os.path.join(self.package_folder, "bin") + self.output.info("Appending PATH env var: {}".format(binpath)) + self.env_info.PATH.append(binpath) + + if self._with_boost(): + if self.options.gandiva: + # FIXME: only filesystem component is used + self.cpp_info.components["libgandiva"].requires.append("boost::boost") + if self.options.parquet and self.settings.compiler == "gcc" and self.settings.compiler.version < tools.Version("4.9"): + self.cpp_info.components["libparquet"].requires.append("boost::boost") + if self._with_openssl(): + self.cpp_info.components["libarrow"].requires.append("openssl::openssl") + if self._with_gflags(): + self.cpp_info.components["libarrow"].requires.append("gflags::gflags") + if self._with_glog(): + self.cpp_info.components["libarrow"].requires.append("glog::glog") + if self._with_jemalloc(): + self.cpp_info.components["libarrow"].requires.append("jemalloc::jemalloc") + if self._with_re2(): + self.cpp_info.components["libgandiva"].requires.append("re2::re2") + if self._with_protobuf(): + self.cpp_info.components["libarrow"].requires.append("protobuf::protobuf") + if self._with_utf8proc(): + self.cpp_info.components["libarrow"].requires.append("uff8proc::uff8proc") + if self._with_llvm(): + self.cpp_info.components["libarrow"].requires.append("llvm::llvm") + if self._with_thrift(): + self.cpp_info.components["libarrow"].requires.append("thrift::thrift") + + if self.options.with_backtrace: + self.cpp_info.components["libarrow"].requires.append("backtrace::backtrace") + if self.options.with_cuda: + self.cpp_info.components["libarrow"].requires.append("cuda::cuda") + if self.options.with_flight_rpc: + self.cpp_info.components["libarrow"].requires.append("flight::flight") + if self.options.with_hiveserver2: + self.cpp_info.components["libarrow"].requires.append("hiveserver2::hiveserver2") + if self.options.with_json: + self.cpp_info.components["libarrow"].requires.append("rapidjson::rapidjson") + if self.options.with_s3: + self.cpp_info.components["libarrow"].requires.append("aws-sdk-cpp::filesystem") + if self.options.with_orc: + self.cpp_info.components["libarrow"].requires.append("orc::orc") + if self.options.with_brotli: + self.cpp_info.components["libarrow"].requires.append("brotli::brotli") + if self.options.with_bz2: + self.cpp_info.components["libarrow"].requires.append("bz2::bz2") + if self.options.with_lz4: + self.cpp_info.components["libarrow"].requires.append("lz4::lz4") + if self.options.with_snappy: + self.cpp_info.components["libarrow"].requires.append("snappy::snappy") + if self.options.with_zlib: + self.cpp_info.components["libarrow"].requires.append("zlib::zlib") + if self.options.with_zstd: + self.cpp_info.components["libarrow"].requires.append("zstd::zstd") diff --git a/recipes/arrow/all/patches/1.0.0-0001-cmake.patch b/recipes/arrow/all/patches/1.0.0-0001-cmake.patch new file mode 100644 index 0000000000000..30255e99a5007 --- /dev/null +++ b/recipes/arrow/all/patches/1.0.0-0001-cmake.patch @@ -0,0 +1,151 @@ +--- cpp/CMakeLists.txt ++++ cpp/CMakeLists.txt +@@ -654,7 +654,7 @@ endif() + + if(ARROW_WITH_BROTLI) + # Order is important for static linking +- set(ARROW_BROTLI_LIBS Brotli::brotlienc Brotli::brotlidec Brotli::brotlicommon) ++ set(ARROW_BROTLI_LIBS Brotli::Brotli) + list(APPEND ARROW_LINK_LIBS ${ARROW_BROTLI_LIBS}) + list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_BROTLI_LIBS}) + endif() +@@ -668,8 +668,8 @@ if(ARROW_WITH_LZ4) + endif() + + if(ARROW_WITH_SNAPPY) +- list(APPEND ARROW_STATIC_LINK_LIBS Snappy::snappy) ++ list(APPEND ARROW_STATIC_LINK_LIBS Snappy::Snappy) + endif() + + if(ARROW_WITH_ZLIB) + list(APPEND ARROW_STATIC_LINK_LIBS ZLIB::ZLIB) +@@ -685,9 +685,9 @@ if(ARROW_ORC) + endif() + + if(ARROW_USE_GLOG) +- list(APPEND ARROW_LINK_LIBS glog::glog) +- list(APPEND ARROW_STATIC_LINK_LIBS glog::glog) ++ list(APPEND ARROW_LINK_LIBS GLOG::GLOG) ++ list(APPEND ARROW_STATIC_LINK_LIBS GLOG::GLOG) + add_definitions("-DARROW_USE_GLOG") + endif() + + if(ARROW_S3) +--- cpp/cmake_modules/DefineOptions.cmake ++++ cpp/cmake_modules/DefineOptions.cmake +@@ -76,7 +76,7 @@ macro(define_option_string name description default) + endmacro() + + # Top level cmake dir +-if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") ++if(1) + #---------------------------------------------------------------------- + set_option_category("Compile and link") + +--- cpp/cmake_modules/ThirdpartyToolchain.cmake ++++ cpp/cmake_modules/ThirdpartyToolchain.cmake +@@ -854,7 +854,7 @@ if(ARROW_WITH_SNAPPY) + # location. + # https://bugzilla.redhat.com/show_bug.cgi?id=1679727 + # https://src.fedoraproject.org/rpms/snappy/pull-request/1 +- find_package(Snappy QUIET HINTS "${CMAKE_ROOT}/Modules/") ++ find_package(Snappy REQUIRED) + if(NOT Snappy_FOUND) + find_package(SnappyAlt) + endif() +@@ -866,14 +866,14 @@ if(ARROW_WITH_SNAPPY) + elseif(Snappy_SOURCE STREQUAL "SYSTEM") + # SnappyConfig.cmake is not installed on Ubuntu/Debian + # TODO: Make a bug report upstream +- find_package(Snappy HINTS "${CMAKE_ROOT}/Modules/") ++ find_package(Snappy REQUIRED) + if(NOT Snappy_FOUND) + find_package(SnappyAlt REQUIRED) + endif() + endif() + + # TODO: Don't use global includes but rather target_include_directories +- get_target_property(SNAPPY_INCLUDE_DIRS Snappy::snappy INTERFACE_INCLUDE_DIRECTORIES) ++ get_target_property(SNAPPY_INCLUDE_DIRS Snappy::Snappy INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${SNAPPY_INCLUDE_DIRS}) + endif() + +@@ -938,7 +938,7 @@ endmacro() + if(ARROW_WITH_BROTLI) + resolve_dependency(Brotli) + # TODO: Don't use global includes but rather target_include_directories +- get_target_property(BROTLI_INCLUDE_DIR Brotli::brotlicommon ++ get_target_property(BROTLI_INCLUDE_DIR Brotli::Brotli + INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${BROTLI_INCLUDE_DIR}) + endif() +@@ -1057,7 +1057,7 @@ endmacro() + if(ARROW_USE_GLOG) + resolve_dependency(GLOG) + # TODO: Don't use global includes but rather target_include_directories +- get_target_property(GLOG_INCLUDE_DIR glog::glog INTERFACE_INCLUDE_DIRECTORIES) ++ get_target_property(GLOG_INCLUDE_DIR GLOG::GLOG INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${GLOG_INCLUDE_DIR}) + endif() + +@@ -1139,8 +1139,8 @@ + build_gflags() + elseif(gflags_SOURCE STREQUAL "SYSTEM") +- # gflagsConfig.cmake is not installed on Ubuntu/Debian +- # TODO: Make a bug report upstream +- find_package(gflags ${ARROW_GFLAGS_REQUIRED_VERSION}) ++ find_package(gflags REQUIRED) ++ add_library(gflags-shared INTERFACE) ++ target_link_libraries(gflags-shared INTERFACE gflags::gflags) + if(NOT gflags_FOUND) + find_package(gflagsAlt ${ARROW_GFLAGS_REQUIRED_VERSION} REQUIRED) + endif() +@@ -1329,6 +1329,6 @@ macro(build_protobuf) + endmacro() +- + if(ARROW_WITH_PROTOBUF) ++ find_package(Protobuf REQUIRED) + if(ARROW_WITH_GRPC) + # gRPC 1.21.0 or later require Protobuf 3.7.0 or later. + set(ARROW_PROTOBUF_REQUIRED_VERSION "3.7.0") +@@ -1365,9 +1365,9 @@ if(ARROW_WITH_PROTOBUF) + set(ARROW_PROTOBUF_LIBPROTOC arrow::protobuf::libprotoc) + else() + if(NOT TARGET protobuf::libprotoc) ++ set(Protobuf_PROTOC_LIBRARY protoc) + if(PROTOBUF_PROTOC_LIBRARY AND NOT Protobuf_PROTOC_LIBRARY) +- # Old CMake versions have a different casing. +- set(Protobuf_PROTOC_LIBRARY ${PROTOBUF_PROTOC_LIBRARY}) ++ set(Protobuf_PROTOC_LIBRARY protoc) + endif() + if(NOT Protobuf_PROTOC_LIBRARY) + message(FATAL_ERROR "libprotoc was set to ${Protobuf_PROTOC_LIBRARY}") +@@ -1802,7 +1802,7 @@ if(ARROW_WITH_RAPIDJSON) + elseif(RapidJSON_SOURCE STREQUAL "SYSTEM") + # Fedora packages place the package information at the wrong location. + # https://bugzilla.redhat.com/show_bug.cgi?id=1680400 +- find_package(RapidJSON ${ARROW_RAPIDJSON_REQUIRED_VERSION} HINTS "${CMAKE_ROOT}") ++ find_package(RapidJSON REQUIRED) + if(RapidJSON_FOUND) + set(RAPIDJSON_INCLUDE_DIR ${RAPIDJSON_INCLUDE_DIRS}) + else() +@@ -2088,7 +2088,7 @@ if(ARROW_WITH_BZ2) + PROPERTIES IMPORTED_LOCATION "${BZIP2_LIBRARIES}" + INTERFACE_INCLUDE_DIRECTORIES "${BZIP2_INCLUDE_DIR}") + endif() +- include_directories(SYSTEM "${BZIP2_INCLUDE_DIR}") ++ include_directories(SYSTEM "${BZip2_INCLUDE_DIR}") + endif() + + macro(build_utf8proc) +--- cpp/cmake_modules/SetupCxxFlags.cmake ++++ cpp/cmake_modules/SetupCxxFlags.cmake +@@ -188,7 +188,7 @@ + message(STATUS "Arrow build warning level: ${BUILD_WARNING_LEVEL}") + + macro(arrow_add_werror_if_debug) +- if("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") ++ if(0) + # Treat all compiler warnings as errors + if(MSVC) + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /WX") diff --git a/recipes/arrow/all/patches/1.0.0-0002-jemalloc.patch b/recipes/arrow/all/patches/1.0.0-0002-jemalloc.patch new file mode 100644 index 0000000000000..7e8bd1ed08039 --- /dev/null +++ b/recipes/arrow/all/patches/1.0.0-0002-jemalloc.patch @@ -0,0 +1,43 @@ +--- cpp/cmake_modules/ThirdpartyToolchain.cmake ++++ cpp/cmake_modules/ThirdpartyToolchain.cmake +@@ -1407,6 +1407,6 @@ endif() + # jemalloc - Unix-only high-performance allocator +- + if(ARROW_JEMALLOC) ++if(0) + message(STATUS "Building (vendored) jemalloc from source") + # We only use a vendored jemalloc as we want to control its version. + # Also our build of jemalloc is specially prefixed so that it will not +@@ -1465,6 +1465,8 @@ if(ARROW_JEMALLOC) + add_dependencies(jemalloc::jemalloc jemalloc_ep) + + list(APPEND ARROW_BUNDLED_STATIC_LIBS jemalloc::jemalloc) ++else() ++ find_package(jemalloc REQUIRED) ++endif() + endif() +- + # ---------------------------------------------------------------------- + # mimalloc - Cross-platform high-performance allocator, from Microsoft +--- cpp/src/arrow/CMakeLists.txt ++++ cpp/src/arrow/CMakeLists.txt +@@ -292,7 +292,7 @@ + + set(_allocator_dependencies "") # Empty list + if(ARROW_JEMALLOC) +- list(APPEND _allocator_dependencies jemalloc_ep) ++ list(APPEND _allocator_dependencies jemalloc::jemalloc) + endif() + if(ARROW_MIMALLOC) + list(APPEND _allocator_dependencies mimalloc_ep) +--- cpp/src/arrow/memory_pool.cc ++++ cpp/src/arrow/memory_pool.cc +@@ -31,7 +31,7 @@ + // Needed to support jemalloc 3 and 4 + #define JEMALLOC_MANGLE + // Explicitly link to our version of jemalloc +-#include "jemalloc_ep/dist/include/jemalloc/jemalloc.h" ++#include "jemalloc/jemalloc.h" + #endif + + #ifdef ARROW_MIMALLOC diff --git a/recipes/arrow/all/patches/1.0.0-0003-fix-shared-msvc.patch b/recipes/arrow/all/patches/1.0.0-0003-fix-shared-msvc.patch new file mode 100644 index 0000000000000..3c7e86d5ff279 --- /dev/null +++ b/recipes/arrow/all/patches/1.0.0-0003-fix-shared-msvc.patch @@ -0,0 +1,13 @@ +--- cpp/src/arrow/CMakeLists.txt ++++ cpp/src/arrow/CMakeLists.txt +@@ -490,6 +490,10 @@ + target_compile_definitions(arrow_static PUBLIC ARROW_STATIC) + endif() + ++if(ARROW_BUILD_SHARED AND WIN32) ++target_compile_definitions(arrow_shared PRIVATE ARROW_EXPORTING) ++endif() ++ + if(ARROW_WITH_BACKTRACE) + find_package(Backtrace) + diff --git a/recipes/arrow/all/patches/1.0.0-0004-mallctl-takes-size_t.patch b/recipes/arrow/all/patches/1.0.0-0004-mallctl-takes-size_t.patch new file mode 100644 index 0000000000000..e9ce3546d355f --- /dev/null +++ b/recipes/arrow/all/patches/1.0.0-0004-mallctl-takes-size_t.patch @@ -0,0 +1,11 @@ +--- cpp/src/arrow/memory_pool.cc ++++ cpp/src/arrow/CMakeLists.txt +@@ -427,7 +427,7 @@ + + Status jemalloc_set_decay_ms(int ms) { + #ifdef ARROW_JEMALLOC +- ssize_t decay_time_ms = static_cast(ms); ++ size_t decay_time_ms = static_cast(ms); + + int err = mallctl("arenas.dirty_decay_ms", nullptr, nullptr, &decay_time_ms, + sizeof(decay_time_ms)); diff --git a/recipes/arrow/all/test_package/CMakeLists.txt b/recipes/arrow/all/test_package/CMakeLists.txt new file mode 100644 index 0000000000000..0770e4cb69ded --- /dev/null +++ b/recipes/arrow/all/test_package/CMakeLists.txt @@ -0,0 +1,10 @@ +cmake_minimum_required(VERSION 3.1) +project(test_package) + +include(${CMAKE_BINARY_DIR}/conanbuildinfo.cmake) +conan_basic_setup() + +add_executable(${PROJECT_NAME} test_package.cpp) +target_link_libraries(${PROJECT_NAME} ${CONAN_LIBS}) +set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11) +target_compile_definitions(${PROJECT_NAME} PRIVATE WITH_JEMALLOC) diff --git a/recipes/arrow/all/test_package/conanfile.py b/recipes/arrow/all/test_package/conanfile.py new file mode 100644 index 0000000000000..bd7165a553cf4 --- /dev/null +++ b/recipes/arrow/all/test_package/conanfile.py @@ -0,0 +1,17 @@ +from conans import ConanFile, CMake, tools +import os + + +class TestPackageConan(ConanFile): + settings = "os", "compiler", "build_type", "arch" + generators = "cmake" + + def build(self): + cmake = CMake(self) + cmake.configure() + cmake.build() + + def test(self): + if not tools.cross_building(self.settings): + bin_path = os.path.join("bin", "test_package") + self.run(bin_path, run_environment=True) diff --git a/recipes/arrow/all/test_package/test_package.cpp b/recipes/arrow/all/test_package/test_package.cpp new file mode 100644 index 0000000000000..42cab6cc76e7a --- /dev/null +++ b/recipes/arrow/all/test_package/test_package.cpp @@ -0,0 +1,190 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include + +using arrow::DoubleBuilder; +using arrow::Int64Builder; +using arrow::ListBuilder; + +// While we want to use columnar data structures to build efficient operations, we +// often receive data in a row-wise fashion from other systems. In the following, +// we want give a brief introduction into the classes provided by Apache Arrow by +// showing how to transform row-wise data into a columnar table. +// +// The data in this example is stored in the following struct: +struct data_row { + int64_t id; + double cost; + std::vector cost_components; +}; + +// Transforming a vector of structs into a columnar Table. +// +// The final representation should be an `arrow::Table` which in turn +// is made up of an `arrow::Schema` and a list of +// `arrow::ChunkedArray` instances. As the first step, we will iterate +// over the data and build up the arrays incrementally. For this +// task, we provide `arrow::ArrayBuilder` classes that help in the +// construction of the final `arrow::Array` instances. +// +// For each type, Arrow has a specially typed builder class. For the primitive +// values `id` and `cost` we can use the respective `arrow::Int64Builder` and +// `arrow::DoubleBuilder`. For the `cost_components` vector, we need to have two +// builders, a top-level `arrow::ListBuilder` that builds the array of offsets and +// a nested `arrow::DoubleBuilder` that constructs the underlying values array that +// is referenced by the offsets in the former array. +arrow::Status VectorToColumnarTable(const std::vector& rows, + std::shared_ptr* table) { + // The builders are more efficient using + // arrow::jemalloc::MemoryPool::default_pool() as this can increase the size of + // the underlying memory regions in-place. At the moment, arrow::jemalloc is only + // supported on Unix systems, not Windows. + arrow::MemoryPool* pool = arrow::default_memory_pool(); + + Int64Builder id_builder(pool); + DoubleBuilder cost_builder(pool); + ListBuilder components_builder(pool, std::make_shared(pool)); + // The following builder is owned by components_builder. + DoubleBuilder& cost_components_builder = + *(static_cast(components_builder.value_builder())); + + // Now we can loop over our existing data and insert it into the builders. The + // `Append` calls here may fail (e.g. we cannot allocate enough additional memory). + // Thus we need to check their return values. For more information on these values, + // check the documentation about `arrow::Status`. + for (const data_row& row : rows) { + ARROW_RETURN_NOT_OK(id_builder.Append(row.id)); + ARROW_RETURN_NOT_OK(cost_builder.Append(row.cost)); + + // Indicate the start of a new list row. This will memorise the current + // offset in the values builder. + ARROW_RETURN_NOT_OK(components_builder.Append()); + // Store the actual values. The final nullptr argument tells the underyling + // builder that all added values are valid, i.e. non-null. + ARROW_RETURN_NOT_OK(cost_components_builder.AppendValues(row.cost_components.data(), + row.cost_components.size())); + } + + // At the end, we finalise the arrays, declare the (type) schema and combine them + // into a single `arrow::Table`: + std::shared_ptr id_array; + ARROW_RETURN_NOT_OK(id_builder.Finish(&id_array)); + std::shared_ptr cost_array; + ARROW_RETURN_NOT_OK(cost_builder.Finish(&cost_array)); + // No need to invoke cost_components_builder.Finish because it is implied by + // the parent builder's Finish invocation. + std::shared_ptr cost_components_array; + ARROW_RETURN_NOT_OK(components_builder.Finish(&cost_components_array)); + + std::vector> schema_vector = { + arrow::field("id", arrow::int64()), arrow::field("cost", arrow::float64()), + arrow::field("cost_components", arrow::list(arrow::float64()))}; + + auto schema = std::make_shared(schema_vector); + + // The final `table` variable is the one we then can pass on to other functions + // that can consume Apache Arrow memory structures. This object has ownership of + // all referenced data, thus we don't have to care about undefined references once + // we leave the scope of the function building the table and its underlying arrays. + *table = arrow::Table::Make(schema, {id_array, cost_array, cost_components_array}); + + return arrow::Status::OK(); +} + +arrow::Status ColumnarTableToVector(const std::shared_ptr& table, + std::vector* rows) { + // To convert an Arrow table back into the same row-wise representation as in the + // above section, we first will check that the table conforms to our expected + // schema and then will build up the vector of rows incrementally. + // + // For the check if the table is as expected, we can utilise solely its schema. + std::vector> schema_vector = { + arrow::field("id", arrow::int64()), arrow::field("cost", arrow::float64()), + arrow::field("cost_components", arrow::list(arrow::float64()))}; + auto expected_schema = std::make_shared(schema_vector); + + if (!expected_schema->Equals(*table->schema())) { + // The table doesn't have the expected schema thus we cannot directly + // convert it to our target representation. + return arrow::Status::Invalid("Schemas are not matching!"); + } + + // As we have ensured that the table has the expected structure, we can unpack the + // underlying arrays. For the primitive columns `id` and `cost` we can use the high + // level functions to get the values whereas for the nested column + // `cost_components` we need to access the C-pointer to the data to copy its + // contents into the resulting `std::vector`. Here we need to be care to + // also add the offset to the pointer. This offset is needed to enable zero-copy + // slicing operations. While this could be adjusted automatically for double + // arrays, this cannot be done for the accompanying bitmap as often the slicing + // border would be inside a byte. + + auto ids = + std::static_pointer_cast(table->column(0)->chunk(0)); + auto costs = + std::static_pointer_cast(table->column(1)->chunk(0)); + auto cost_components = + std::static_pointer_cast(table->column(2)->chunk(0)); + auto cost_components_values = + std::static_pointer_cast(cost_components->values()); + // To enable zero-copy slices, the native values pointer might need to account + // for this slicing offset. This is not needed for the higher level functions + // like Value(…) that already account for this offset internally. + const double* ccv_ptr = cost_components_values->data()->GetValues(1); + + for (int64_t i = 0; i < table->num_rows(); i++) { + // Another simplification in this example is that we assume that there are + // no null entries, e.g. each row is fill with valid values. + int64_t id = ids->Value(i); + double cost = costs->Value(i); + const double* first = ccv_ptr + cost_components->value_offset(i); + const double* last = ccv_ptr + cost_components->value_offset(i + 1); + std::vector components_vec(first, last); + rows->push_back({id, cost, components_vec}); + } + + return arrow::Status::OK(); +} + +#define EXIT_ON_FAILURE(expr) \ + do { \ + arrow::Status status_ = (expr); \ + if (!status_.ok()) { \ + std::cerr << status_.message() << std::endl; \ + return EXIT_FAILURE; \ + } \ + } while (0); + +int main(int argc, char** argv) { + std::vector rows = { + {1, 1.0, {1.0}}, {2, 2.0, {1.0, 2.0}}, {3, 3.0, {1.0, 2.0, 3.0}}}; + + std::shared_ptr table; + EXIT_ON_FAILURE(VectorToColumnarTable(rows, &table)); + + std::vector expected_rows; + EXIT_ON_FAILURE(ColumnarTableToVector(table, &expected_rows)); + + assert(rows.size() == expected_rows.size()); + + return EXIT_SUCCESS; +} diff --git a/recipes/arrow/config.yml b/recipes/arrow/config.yml new file mode 100644 index 0000000000000..40341aa3db6cd --- /dev/null +++ b/recipes/arrow/config.yml @@ -0,0 +1,3 @@ +versions: + "1.0.0": + folder: all