From 3dfcfb5829e9928f7993a58c9780f5425ffd7fbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Wed, 19 Nov 2025 14:13:10 +0000 Subject: [PATCH 1/2] Copy Torch from hf-nix We have decided to make kernel-builder standalone to make it easier to maintain Torch versions as part of the same repo. This copies over Torch from hf-nix. Other bits are still needed, but let's start somewhere. --- overlay.nix | 20 + pkgs/python-modules/torch/archs.nix | 128 +++ pkgs/python-modules/torch/binary/default.nix | 43 + .../torch/binary/generate_torch_versions.py | 311 +++++++ pkgs/python-modules/torch/binary/generic.nix | 336 ++++++++ .../torch/binary/torch-versions-hash.json | 128 +++ .../torch/binary/torch-versions.json | 99 +++ .../source/2_8/0001-patch-xpu-ops-CMake.patch | 50 ++ .../2_8/cmake-load-hip-invalid-state.diff | 40 + .../torch/source/2_8/default.nix | 761 ++++++++++++++++++ .../source/2_8/fix-cmake-cuda-toolkit.patch | 51 ++ .../torch/source/2_8/mkl-rpath.patch | 17 + .../source/2_8/pthreadpool-disable-gcd.diff | 45 ++ .../torch/source/2_8/pytorch-pr-108847.patch | 31 + .../source/2_9/0001-patch-xpu-ops-CMake.patch | 50 ++ .../torch/source/2_9/default.nix | 744 +++++++++++++++++ .../source/2_9/fix-cmake-cuda-toolkit.patch | 51 ++ .../torch/source/2_9/mkl-rpath.patch | 17 + .../source/2_9/pthreadpool-disable-gcd.diff | 45 ++ .../torch/source/2_9/pytorch-pr-108847.patch | 31 + 20 files changed, 2998 insertions(+) create mode 100644 pkgs/python-modules/torch/archs.nix create mode 100644 pkgs/python-modules/torch/binary/default.nix create mode 100755 pkgs/python-modules/torch/binary/generate_torch_versions.py create mode 100644 pkgs/python-modules/torch/binary/generic.nix create mode 100644 pkgs/python-modules/torch/binary/torch-versions-hash.json create mode 100644 pkgs/python-modules/torch/binary/torch-versions.json create mode 100644 pkgs/python-modules/torch/source/2_8/0001-patch-xpu-ops-CMake.patch create mode 100644 pkgs/python-modules/torch/source/2_8/cmake-load-hip-invalid-state.diff create mode 100644 pkgs/python-modules/torch/source/2_8/default.nix create mode 100644 pkgs/python-modules/torch/source/2_8/fix-cmake-cuda-toolkit.patch create mode 100644 pkgs/python-modules/torch/source/2_8/mkl-rpath.patch create mode 100644 pkgs/python-modules/torch/source/2_8/pthreadpool-disable-gcd.diff create mode 100644 pkgs/python-modules/torch/source/2_8/pytorch-pr-108847.patch create mode 100644 pkgs/python-modules/torch/source/2_9/0001-patch-xpu-ops-CMake.patch create mode 100644 pkgs/python-modules/torch/source/2_9/default.nix create mode 100644 pkgs/python-modules/torch/source/2_9/fix-cmake-cuda-toolkit.patch create mode 100644 pkgs/python-modules/torch/source/2_9/mkl-rpath.patch create mode 100644 pkgs/python-modules/torch/source/2_9/pthreadpool-disable-gcd.diff create mode 100644 pkgs/python-modules/torch/source/2_9/pytorch-pr-108847.patch diff --git a/overlay.nix b/overlay.nix index f9cb0467..b60a54ce 100644 --- a/overlay.nix +++ b/overlay.nix @@ -50,6 +50,26 @@ final: prev: { }); pyclibrary = python-self.callPackage ./pkgs/python-modules/pyclibrary { }; + + mkTorch = callPackage ./pkgs/python-modules/torch/binary { }; + + torch-bin_2_8 = mkTorch { + version = "2.8"; + xpuPackages = final.xpuPackages_2025_1; + }; + + torch-bin_2_9 = mkTorch { + version = "2.9"; + xpuPackages = final.xpuPackages_2025_2; + }; + + torch_2_8 = callPackage ./pkgs/python-modules/torch/source/2_8 { + xpuPackages = final.xpuPackages_2025_1; + }; + + torch_2_9 = callPackage ./pkgs/python-modules/torch/source/2_9 { + xpuPackages = final.xpuPackages_2025_2; + }; } ) ]; diff --git a/pkgs/python-modules/torch/archs.nix b/pkgs/python-modules/torch/archs.nix new file mode 100644 index 00000000..3dd93448 --- /dev/null +++ b/pkgs/python-modules/torch/archs.nix @@ -0,0 +1,128 @@ +{ + "2.8" = { + # https://github.com/pytorch/pytorch/blob/release/2.8/.ci/manywheel/build_cuda.sh + capsPerCudaVersion = { + "12.9" = [ + "7.0" + "7.5" + "8.0" + "8.6" + "9.0" + "10.0" + "12.0" + ]; + "12.8" = [ + "7.0" + "7.5" + "8.0" + "8.6" + "9.0" + "10.0" + "12.0" + ]; + "12.6" = [ + "5.0" + "6.0" + "7.0" + "7.5" + "8.0" + "8.6" + "9.0" + ]; + # Not a supported upstream configuration, but keep it around for + # builds that fail on newer CUDA versions. + "12.4" = [ + "5.0" + "6.0" + "7.0" + "7.5" + "8.0" + "8.6" + "9.0" + ]; + }; + # https://github.com/pytorch/pytorch/blob/ba56102387ef21a3b04b357e5b183d48f0afefc7/.ci/docker/manywheel/build.sh#L82 + supportedTorchRocmArchs = [ + "gfx900" + "gfx906" + "gfx908" + "gfx90a" + "gfx942" + "gfx1030" + "gfx1100" + "gfx1101" + "gfx1102" + "gfx1200" + "gfx1201" + ]; + }; + + "2.9" = { + # https://github.com/pytorch/pytorch/blob/release/2.9/.ci/manywheel/build_cuda.sh + capsPerCudaVersion = { + "13.0" = [ + "7.5" + "8.0" + "8.6" + "9.0" + "10.0" + "12.0" + ]; + # NOTE: 12.9 does not seem to be in RC builds, check if needed for final release. + # https://download.pytorch.org/whl/test/torch/ + "12.9" = [ + "7.0" + "7.5" + "8.0" + "8.6" + "9.0" + "10.0" + "12.0" + ]; + "12.8" = [ + "7.0" + "7.5" + "8.0" + "8.6" + "9.0" + "10.0" + "12.0" + ]; + "12.6" = [ + "5.0" + "6.0" + "7.0" + "7.5" + "8.0" + "8.6" + "9.0" + ]; + # Not a supported upstream configuration, but keep it around for + # builds that fail on newer CUDA versions. + "12.4" = [ + "5.0" + "6.0" + "7.0" + "7.5" + "8.0" + "8.6" + "9.0" + ]; + }; + + supportedTorchRocmArchs = [ + # https://github.com/pytorch/pytorch/blob/21fec65781bebe867faf209f89bb687ffd236ca4/.ci/docker/manywheel/build.sh#L92 + "gfx900" + "gfx906" + "gfx908" + "gfx90a" + "gfx942" + "gfx1030" + "gfx1100" + "gfx1101" + "gfx1102" + "gfx1200" + "gfx1201" + ]; + }; +} diff --git a/pkgs/python-modules/torch/binary/default.nix b/pkgs/python-modules/torch/binary/default.nix new file mode 100644 index 00000000..6ef0127a --- /dev/null +++ b/pkgs/python-modules/torch/binary/default.nix @@ -0,0 +1,43 @@ +{ + config, + lib, + stdenv, + + cudaSupport ? config.cudaSupport, + rocmSupport ? config.rocmSupport, + xpuSupport ? (config.xpuSupport or false), + + callPackage, + cudaPackages, + rocmPackages, +}: + +{ + xpuPackages, + version, +}: + +let + system = stdenv.hostPlatform.system; + flattenVersion = version: lib.replaceStrings [ "." ] [ "" ] (lib.versions.pad 2 version); + framework = + if cudaSupport then + "cu${flattenVersion cudaPackages.cudaMajorMinorVersion}" + else if rocmSupport then + "rocm${flattenVersion (lib.versions.majorMinor rocmPackages.rocm.version)}" + else if xpuSupport then + "xpu" + else + "cpu"; + torchVersions = builtins.fromJSON (builtins.readFile ./torch-versions-hash.json); + torchBySystem = torchVersions.${version} or (throw "Unsupported torch version: ${version}"); + torchByFramework = + torchBySystem.${system} or (throw "Unsupported system: ${system} for torch version: ${version}"); + urlHash = + torchByFramework.${framework} + or (throw "Unsupported framework: ${framework} for torch version: ${version} on system: ${system}"); +in +callPackage ./generic.nix { + inherit xpuPackages; + inherit (urlHash) url hash version; +} diff --git a/pkgs/python-modules/torch/binary/generate_torch_versions.py b/pkgs/python-modules/torch/binary/generate_torch_versions.py new file mode 100755 index 00000000..26da246e --- /dev/null +++ b/pkgs/python-modules/torch/binary/generate_torch_versions.py @@ -0,0 +1,311 @@ +#!/usr/bin/env nix-shell +#!nix-shell -i python3 -p python3 +""" +Script to generate torch-versions-hash.json from torch-versions.json + +This script downloads all the variants that are specified and computes +their Nix store hashes. Variants for which the hash was already computed +will not be proccessed again to avoid redownloading/hashing. +""" + +import json +import subprocess +import sys +import urllib.parse +from typing import Dict +import os + +OUTPUT_FILE = "torch-versions-hash.json" + +PYTHON_VERSION = "cp313" + + +def load_existing_hashes() -> Dict[str, str]: + """Load existing URL -> hash mappings from output file""" + if os.path.exists(OUTPUT_FILE): + try: + with open(OUTPUT_FILE, "r") as f: + data = json.load(f) + url_to_hash = {} + for version_data in data.values(): + for system_data in version_data.values(): + for framework_data in system_data.values(): + if ( + isinstance(framework_data, dict) + and "url" in framework_data + and "hash" in framework_data + ): + if framework_data["hash"]: + url_to_hash[framework_data["url"]] = framework_data[ + "hash" + ] + return url_to_hash + except (json.JSONDecodeError, IOError) as e: + # If we fail to parse the file, emit a warning and start from scratch. + print( + f"Warning: Could not load existing {OUTPUT_FILE}: {e}", file=sys.stderr + ) + return {} + + +def cuda_version_to_framework(cuda_version: str) -> str: + """Convert CUDA version like '11.8' to framework identifier like 'cu118'""" + return f"cu{cuda_version.replace('.', '')}" + + +def rocm_version_to_framework(rocm_version: str) -> str: + """Convert ROCm version like '6.3' to framework identifier like 'rocm6.3'""" + return f"rocm{rocm_version}" + + +def version_to_major_minor(version: str) -> str: + """Convert version like '2.8.0' to '2.8'""" + parts = version.split(".") + if len(parts) >= 2: + return f"{parts[0]}.{parts[1]}" + return version + + +def system_to_platform(system: str, framework_type: str = None) -> str: + """Convert system identifier to platform string for wheel naming""" + # XPU uses a different platform format + if framework_type == "xpu": + xpu_platform_map = { + "x86_64-linux": "linux_x86_64", + } + return xpu_platform_map.get(system, system) + + platform_map = { + "x86_64-linux": "manylinux_2_28_x86_64", + "aarch64-linux": "manylinux_2_28_aarch64", + "aarch64-darwin": "macosx_11_0_arm64", + } + return platform_map.get(system, system) + + +def generate_pytorch_url( + torch_version: str, + framework_version: str, + framework_type: str, + python_version: str, + system: str, +) -> str: + """Generate PyTorch wheel download URL.""" + platform = system_to_platform(system, framework_type) + + # macOS uses CPU wheels (no CUDA/ROCm/XPU support) + if "darwin" in system: + framework_dir = "cpu" + version_part = torch_version + abi_tag = "none" if "darwin" in system else python_version + wheel_name = f"torch-{version_part}-{python_version}-{abi_tag}-{platform}.whl" + elif framework_type == "cpu": + framework_dir = "cpu" + version_part = f"{torch_version}%2Bcpu" + abi_tag = python_version + wheel_name = f"torch-{version_part}-{python_version}-{abi_tag}-{platform}.whl" + elif framework_type == "xpu": + framework = "xpu" + framework_dir = framework + version_part = f"{torch_version}%2B{framework}" + abi_tag = python_version + wheel_name = f"torch-{version_part}-{python_version}-{abi_tag}-{platform}.whl" + else: + if framework_type == "cuda": + framework = cuda_version_to_framework(framework_version) + elif framework_type == "rocm": + framework = rocm_version_to_framework(framework_version) + else: + raise ValueError(f"Unsupported framework type: {framework_type}") + + framework_dir = framework + version_part = f"{torch_version}%2B{framework}" + abi_tag = python_version + wheel_name = f"torch-{version_part}-{python_version}-{abi_tag}-{platform}.whl" + + return f"https://download.pytorch.org/whl/{framework_dir}/{wheel_name}" + + +def compute_nix_hash(url: str) -> str: + try: + print(f"Fetching hash for: {url}") + + # Some URL encodings are not valid in store paths, so unquote. + filename = url.split("/")[-1] + clean_filename = urllib.parse.unquote(filename) + + result = subprocess.run( + ["nix-prefetch-url", "--type", "sha256", "--name", clean_filename, url], + check=True, + capture_output=True, + text=True, + ) + base32_hash = result.stdout.strip() + + # Convert base32 hash to SRI format. + convert_result = subprocess.run( + [ + "nix", + "hash", + "convert", + "--hash-algo", + "sha256", + "--from", + "nix32", + base32_hash, + ], + check=True, + capture_output=True, + text=True, + ) + return convert_result.stdout.strip() + except subprocess.CalledProcessError as e: + print(f"Error computing hash for {url}: {e.stderr}", file=sys.stderr) + sys.exit(1) + except FileNotFoundError as e: + if "nix-prefetch-url" in str(e): + print( + "Error: nix-prefetch-url not found. Please ensure Nix is installed.", + file=sys.stderr, + ) + sys.exit(1) + else: + print( + "Error: nix command not found. Please ensure Nix is installed.", + file=sys.stderr, + ) + sys.exit(1) + + +def main(): + existing_hashes = load_existing_hashes() + cache_hits = 0 + cache_misses = 0 + + try: + with open("torch-versions.json", "r") as f: + torch_versions = json.load(f) + except FileNotFoundError: + print("Error: torch-versions.json not found", file=sys.stderr) + sys.exit(1) + except json.JSONDecodeError as e: + print(f"Error parsing torch-versions.json: {e}", file=sys.stderr) + sys.exit(1) + + urls_hashes = {} + + print(f"Processing {len(torch_versions)} entries from torch-versions.json") + print(f"Found {len(existing_hashes)} existing hashes") + + for entry in torch_versions: + torch_version = entry.get("torchVersion") + cuda_version = entry.get("cudaVersion") + rocm_version = entry.get("rocmVersion") + xpu_version = entry.get("xpuVersion") + cpu = entry.get("cpu", False) + metal = entry.get("metal", False) + systems = entry.get("systems", []) + + if not torch_version: + print(f"Skipping entry without torchVersion: {entry}", file=sys.stderr) + continue + + version_key = version_to_major_minor(torch_version) + + if cuda_version: + framework_type = "cuda" + framework_version = cuda_version + print(f"Processing torch {torch_version} with CUDA {cuda_version}") + elif rocm_version: + framework_type = "rocm" + framework_version = rocm_version + print(f"Processing torch {torch_version} with ROCm {rocm_version}") + elif xpu_version: + framework_type = "xpu" + framework_version = xpu_version + print(f"Processing torch {torch_version} with XPU {xpu_version}") + elif cpu: + framework_type = "cpu" + framework_version = "cpu" + print(f"Processing torch {torch_version} (CPU build)") + elif metal: + framework_type = "cpu" + framework_version = "cpu" + print( + f"Processing torch {torch_version} (CPU-only build with Metal support)" + ) + else: + print( + f"Skipping entry without framework specification: {entry}", + file=sys.stderr, + ) + continue + + if version_key not in urls_hashes: + urls_hashes[version_key] = {} + + for system in systems: + print(f" Processing system: {system}") + + if system not in urls_hashes[version_key]: + urls_hashes[version_key][system] = {} + + if "darwin" in system: + framework = "cpu" + else: + if framework_type == "cuda": + framework = cuda_version_to_framework(framework_version) + elif framework_type == "rocm": + framework = rocm_version_to_framework(framework_version) + elif framework_type == "xpu": + framework = "xpu" + elif framework_type == "cpu": + framework = "cpu" + else: + print( + f" ⚠️ Warning: Unknown framework type {framework_type} for Linux system {system}", + file=sys.stderr, + ) + continue + + url = generate_pytorch_url( + torch_version, framework_version, framework_type, PYTHON_VERSION, system + ) + print(f" URL: {url}") + + was_cached = url in existing_hashes + if was_cached: + hash_value = existing_hashes[url] + else: + hash_value = compute_nix_hash(url) + + if was_cached: + cache_hits += 1 + else: + cache_misses += 1 + + urls_hashes[version_key][system][framework.replace(".", "")] = { + "url": url, + "hash": hash_value, + "version": torch_version, + } + + print(f" Hash: {hash_value}") + + try: + with open(OUTPUT_FILE, "w") as f: + json.dump(urls_hashes, f, indent=2) + print(f"Successfully generated {OUTPUT_FILE}") + except IOError as e: + print(f"Error writing {OUTPUT_FILE}: {e}", file=sys.stderr) + sys.exit(1) + + total_urls = cache_hits + cache_misses + if total_urls > 0: + print( + f"Cache statistics: {cache_hits}/{total_urls} hits ({cache_hits/total_urls*100:.1f}% hit rate)" + ) + + +if __name__ == "__main__": + main() diff --git a/pkgs/python-modules/torch/binary/generic.nix b/pkgs/python-modules/torch/binary/generic.nix new file mode 100644 index 00000000..8acd481c --- /dev/null +++ b/pkgs/python-modules/torch/binary/generic.nix @@ -0,0 +1,336 @@ +{ + config, + lib, + stdenv, + symlinkJoin, + buildPythonPackage, + fetchurl, + + cudaSupport ? config.cudaSupport, + rocmSupport ? config.rocmSupport, + tritonSupport ? (!stdenv.hostPlatform.isDarwin), + xpuSupport ? (config.xpuSupport or false), + + # Native build inputs + autoAddDriverRunpath, + autoPatchelfHook, + python, + pythonRelaxWheelDepsHook, + pythonWheelDepsCheckHook, + + # Build inputs + cudaPackages, + rocmPackages, + xpuPackages, + + # Python dependencies + filelock, + fsspec, + jinja2, + networkx, + numpy, + pyyaml, + requests, + setuptools, + sympy, + triton, + triton-cuda, + typing-extensions, + + url, + hash, + version, + # Remove, needed for compat. + cxx11Abi ? true, + + effectiveStdenv ? if cudaSupport then cudaPackages.backendStdenv else stdenv, +}: +let + effectiveTriton = + if cudaSupport then + triton-cuda + else if xpuSupport then + python.pkgs.triton-xpu_2_8 + else + triton; + + archs = (import ../archs.nix).${lib.versions.majorMinor version}; + + supportedTorchCudaCapabilities = + let + inherit (archs) capsPerCudaVersion; + real = capsPerCudaVersion."${lib.versions.majorMinor cudaPackages.cudaMajorMinorVersion}"; + ptx = lib.map (x: "${x}+PTX") real; + in + real ++ ptx; + supportedCudaCapabilities = lib.intersectLists cudaPackages.flags.cudaCapabilities supportedTorchCudaCapabilities; + inherit (archs) supportedTorchRocmArchs; + + aotritonVersions = with rocmPackages; { + "2.8" = aotriton_0_10; + "2.9" = aotriton_0_11; + }; + + aotriton = + let + torchMajorMinor = lib.versions.majorMinor version; + in + aotritonVersions.${torchMajorMinor} + or (throw "aotriton version is not specified Torch ${torchMajorMinor}"); + + rocmtoolkit_joined = symlinkJoin { + name = "rocm-merged"; + + paths = with rocmPackages; [ + aotriton + clr + comgr + hipblas + hipblas-common-devel + hipblaslt + hipfft + hipify-clang + hiprand + hipsolver + hipsparse + hipsparselt + hsa-rocr + miopen-hip + rccl + rocblas + rocm-core + rocm-device-libs + rocm-hip-runtime + rocm-smi-lib + rocminfo + rocrand + rocsolver + rocsparse + roctracer + ]; + + postBuild = '' + # Fix `setuptools` not being found + rm -rf $out/nix-support + + # Variables that we want to pass through to downstream derivations. + mkdir -p $out/nix-support + echo 'export ROCM_PATH="${placeholder "out"}"' >> $out/nix-support/setup-hook + echo 'export ROCM_SOURCE_DIR="${placeholder "out"}"' >> $out/nix-support/setup-hook + echo 'export CMAKE_CXX_FLAGS="-I${placeholder "out"}/include -I${placeholder "out"}/include/rocblas"' >> $out/nix-support/setup-hook + ''; + }; + +in +buildPythonPackage { + pname = "torch"; + inherit version; + format = "wheel"; + + stdenv = effectiveStdenv; + + outputs = [ + "out" # output standard python package + "cxxdev" # propagated deps for the cmake consumers of torch + ]; + cudaPropagateToOutput = "cxxdev"; + rocmPropagateToOutput = "cxxdev"; + + src = fetchurl { + inherit url hash; + }; + + nativeBuildInputs = [ + pythonRelaxWheelDepsHook + pythonWheelDepsCheckHook + ] + ++ lib.optionals stdenv.hostPlatform.isLinux [ + autoPatchelfHook + ] + ++ lib.optionals cudaSupport [ + autoAddDriverRunpath + cudaPackages.setupCudaHook + ] + ++ lib.optionals rocmSupport [ + rocmPackages.setupRocmHook + ]; + + buildInputs = + lib.optionals cudaSupport ( + with cudaPackages; + [ + # Use lib output to avoid libcuda.so.1 stub getting used. + cuda_cudart + cuda_cupti + cuda_nvrtc + cudnn + libcublas + libcufft + libcufile + libcurand + libcusolver + libcusparse + libcusparse_lt + nccl + ] + ) + ++ lib.optionals (cudaSupport && lib.versionAtLeast version "2.9") [ + cudaPackages.libnvshmem + ] + ++ lib.optionals rocmSupport ([ + rocmtoolkit_joined + ]) + ++ lib.optionals xpuSupport ( + with xpuPackages; + [ + intel-oneapi-ccl + intel-oneapi-compiler-dpcpp-cpp-runtime + intel-oneapi-compiler-shared-runtime + intel-oneapi-mkl-core + intel-oneapi-mkl-sycl-blas + intel-oneapi-mkl-sycl-dft + intel-oneapi-mkl-sycl-lapack + intel-oneapi-mpi + intel-pti + ] + ); + + dependencies = [ + filelock + fsspec + jinja2 + networkx + numpy + pyyaml + requests + setuptools + sympy + typing-extensions + ] + ++ lib.optionals tritonSupport [ + effectiveTriton + ]; + + pythonRelaxWheelDeps = [ + "sympy" + "triton" + ]; + + # These are framework dependencies that are normally installed as Python + # dependencies, but we don't need them or provide them because we burn + # the Nix store paths of the framework into the Torch libraries.. + pythonRemoveWheelDeps = + lib.optionals cudaSupport [ + "nvidia-cuda-runtime" + "nvidia-cuda-nvrtc" + "nvidia-cuda-cupti" + "nvidia-cudnn" + "nvidia-cublas" + "nvidia-cufft" + "nvidia-curand" + "nvidia-cusolver" + "nvidia-cusparse" + "nvidia-cusparselt" + "nvidia-nccl" + "nvidia-nvshmem" + "nvidia-nvtx" + "nvidia-nvjitlink" + "nvidia-cufile" + ] + ++ lib.optionals rocmSupport [ + "pytorch-triton-rocm" + ] + ++ lib.optionals xpuSupport [ + "intel-cmplr-lib-rt" + "intel-cmplr-lib-ur" + "intel-cmplr-lic-rt" + "intel-sycl-rt" + "oneccl-devel" + "oneccl" + "impi-rt" + "onemkl-sycl-blas" + "onemkl-sycl-dft" + "onemkl-sycl-lapack" + "onemkl-sycl-rng" + "onemkl-sycl-sparse" + "dpcpp-cpp-rt" + "intel-opencl-rt" + "mkl" + "intel-openmp" + "tbb" + "tcmlib" + "umf" + "intel-pti" + "pytorch-triton-xpu" + ]; + + propagatedCxxBuildInputs = lib.optionals rocmSupport [ rocmtoolkit_joined ]; + + postInstall = + lib.optionalString cudaSupport '' + # Remove to use FindCUDAToolkit from CMake. + rm -f $out/${python.sitePackages}/torch/share/cmake/Caffe2/FindCUDAToolkit.cmake + '' + + lib.optionalString rocmSupport '' + # Remove all ROCm libraries, we want to link against Nix packages. + # This keeps the outputs lean and requires downstream to specify + # dependencies. + rm -rf $out/${python.sitePackages}/torch/lib/{libamd*,libaotriton*,libdrm*,libelf*,libgomp*,libhip*,libhsa*,libMIOpen*,libnuma*,librccl*,libroc*,libtinfo*}.so* + rm -rf $out/${python.sitePackages}/torch/lib/{rocblas,hipblaslt,hipsparselt} + ''; + + autoPatchelfIgnoreMissingDeps = lib.optionals stdenv.hostPlatform.isLinux [ + "libcuda.so.1" + ]; + + # We want to have glibc in RPATH as well, because kernel-builder build + # environments use an older glibc. + autoPatchelfFlags = [ "--keep-libc" ]; + + # See https://github.com/NixOS/nixpkgs/issues/296179 + # + # This is a quick hack to add `libnvrtc` to the runpath so that torch can find + # it when it is needed at runtime. + extraRunpaths = lib.optionals cudaSupport [ "${lib.getLib cudaPackages.cuda_nvrtc}/lib" ]; + postPhases = lib.optionals stdenv.hostPlatform.isLinux [ "postPatchelfPhase" ]; + postPatchelfPhase = '' + while IFS= read -r -d $'\0' elf ; do + for extra in $extraRunpaths ; do + echo patchelf "$elf" --add-rpath "$extra" >&2 + patchelf "$elf" --add-rpath "$extra" + done + done < <( + find "''${!outputLib}" "$out" -type f -iname '*.so' -print0 + ) + ''; + + postFixup = '' + mkdir -p "$cxxdev/nix-support" + printWords "''${propagatedCxxBuildInputs[@]}" >> "$cxxdev/nix-support/propagated-build-inputs" + ''; + + dontStrip = true; + + pythonImportsCheck = [ "torch" ]; + + passthru = { + inherit + cudaSupport + cudaPackages + cxx11Abi + rocmSupport + rocmPackages + xpuSupport + xpuPackages + ; + + cudaCapabilities = if cudaSupport then supportedCudaCapabilities else [ ]; + rocmArchs = if rocmSupport then supportedTorchRocmArchs else [ ]; + }; + + meta = with lib; { + description = "PyTorch: Tensors and Dynamic neural networks in Python with strong GPU acceleration"; + homepage = "https://pytorch.org/"; + license = lib.licenses.bsd3; + }; +} diff --git a/pkgs/python-modules/torch/binary/torch-versions-hash.json b/pkgs/python-modules/torch/binary/torch-versions-hash.json new file mode 100644 index 00000000..d1124f96 --- /dev/null +++ b/pkgs/python-modules/torch/binary/torch-versions-hash.json @@ -0,0 +1,128 @@ +{ + "2.8": { + "x86_64-linux": { + "cu126": { + "url": "https://download.pytorch.org/whl/cu126/torch-2.8.0%2Bcu126-cp313-cp313-manylinux_2_28_x86_64.whl", + "hash": "sha256-0IFEAR5BC50VkU5yVuGxcIqQSEyywDcSGZ4CkYVtQXc=", + "version": "2.8.0" + }, + "cu128": { + "url": "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", + "hash": "sha256-OoUjaaON7DQ9RezQvDZg95uIoj4Mh40YcH98E79JU48=", + "version": "2.8.0" + }, + "cu129": { + "url": "https://download.pytorch.org/whl/cu129/torch-2.8.0%2Bcu129-cp313-cp313-manylinux_2_28_x86_64.whl", + "hash": "sha256-VjdAFnviGJtxUwtQPwyKjXqCZ91J1N5vnF8dI/viN98=", + "version": "2.8.0" + }, + "rocm63": { + "url": "https://download.pytorch.org/whl/rocm6.3/torch-2.8.0%2Brocm6.3-cp313-cp313-manylinux_2_28_x86_64.whl", + "hash": "sha256-LNqsVUXWbxEDCRSuA8hdFh8ksUBTtn9JUAqPUmOO8QQ=", + "version": "2.8.0" + }, + "rocm64": { + "url": "https://download.pytorch.org/whl/rocm6.4/torch-2.8.0%2Brocm6.4-cp313-cp313-manylinux_2_28_x86_64.whl", + "hash": "sha256-Ji6+rGkF34AZafttvcCFx1whxWCdyDVzNOwLCiQuUOs=", + "version": "2.8.0" + }, + "cpu": { + "url": "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", + "hash": "sha256-j4He20xgduwyWsw7R1JfnFUOUoShjq4dkGHFQ/e2594=", + "version": "2.8.0" + }, + "xpu": { + "url": "https://download.pytorch.org/whl/xpu/torch-2.8.0%2Bxpu-cp313-cp313-linux_x86_64.whl", + "hash": "sha256-ZZkiWk6DcU2MM7a/3q+9hTiQJ9KScGNbtzhdYiFL7p4=", + "version": "2.8.0" + } + }, + "aarch64-linux": { + "cu129": { + "url": "https://download.pytorch.org/whl/cu129/torch-2.8.0%2Bcu129-cp313-cp313-manylinux_2_28_aarch64.whl", + "hash": "sha256-rS1kMWY156sG9slzolJSbVmpKiBFglwQL4dpFKcjBNA=", + "version": "2.8.0" + }, + "cpu": { + "url": "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", + "hash": "sha256-pQZLXiN3LI0WQGjMfBLgGnX697lI7NlaDUAH10h+XyU=", + "version": "2.8.0" + } + }, + "aarch64-darwin": { + "cpu": { + "url": "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", + "hash": "sha256-BX79MKZ3jS7l4jdM1jpj9jMRqm8zMh5ifGVd9gq905A=", + "version": "2.8.0" + } + } + }, + "2.9": { + "x86_64-linux": { + "cu126": { + "url": "https://download.pytorch.org/whl/cu126/torch-2.9.0%2Bcu126-cp313-cp313-manylinux_2_28_x86_64.whl", + "hash": "sha256-DEW2Nu1sAszuYk7FPYRMMrHbm+sj1hnzfEEVJGeQTNU=", + "version": "2.9.0" + }, + "cu128": { + "url": "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", + "hash": "sha256-l97wCH+O8XG5AC6lALr/3UQMe91VnCPDi7+HgbZ+k2Q=", + "version": "2.9.0" + }, + "cu130": { + "url": "https://download.pytorch.org/whl/cu130/torch-2.9.0%2Bcu130-cp313-cp313-manylinux_2_28_x86_64.whl", + "hash": "sha256-kqktts3jjQUiDB996Rzqz/AgVGOGxbegomjcquF7XBg=", + "version": "2.9.0" + }, + "rocm63": { + "url": "https://download.pytorch.org/whl/rocm6.3/torch-2.9.0%2Brocm6.3-cp313-cp313-manylinux_2_28_x86_64.whl", + "hash": "sha256-gw3ZjX2JlS7MV8aO6nCyTMAsDO1CSa7z9UzHsW/Wbic=", + "version": "2.9.0" + }, + "rocm64": { + "url": "https://download.pytorch.org/whl/rocm6.4/torch-2.9.0%2Brocm6.4-cp313-cp313-manylinux_2_28_x86_64.whl", + "hash": "sha256-yKiHbnxb2P3MOUKJB20rgyla3a1PXKYOo4ZRI9Xcn3Y=", + "version": "2.9.0" + }, + "cpu": { + "url": "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", + "hash": "sha256-bJshdYRACWPVtNrds3Eex6N3jqshHhhlT7oHbM47hoI=", + "version": "2.9.0" + }, + "xpu": { + "url": "https://download.pytorch.org/whl/xpu/torch-2.9.0%2Bxpu-cp313-cp313-linux_x86_64.whl", + "hash": "sha256-mNagbdf7GFh0NnsYvWCfBfFv3OQUKlmAypRGGUmWXNI=", + "version": "2.9.0" + } + }, + "aarch64-linux": { + "cu126": { + "url": "https://download.pytorch.org/whl/cu126/torch-2.9.0%2Bcu126-cp313-cp313-manylinux_2_28_aarch64.whl", + "hash": "sha256-qn+dy/BTDsEKIRRxyK+1Mo1bvKtvFP0beLXm8vFO1hc=", + "version": "2.9.0" + }, + "cu128": { + "url": "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp313-cp313-manylinux_2_28_aarch64.whl", + "hash": "sha256-TXb3E0WvR/Aix/pV7dDBgQ0Br4ncue3P36/j0qD3prg=", + "version": "2.9.0" + }, + "cu130": { + "url": "https://download.pytorch.org/whl/cu130/torch-2.9.0%2Bcu130-cp313-cp313-manylinux_2_28_aarch64.whl", + "hash": "sha256-7PPSS9TA5uQlvXeKbembUieeACGmDX6xGrDC1mnz+bA=", + "version": "2.9.0" + }, + "cpu": { + "url": "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", + "hash": "sha256-vkQ42NrX8NWl5U8P7viok0RolOyH8QK7HYLcxFGFQuQ=", + "version": "2.9.0" + } + }, + "aarch64-darwin": { + "cpu": { + "url": "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp313-none-macosx_11_0_arm64.whl", + "hash": "sha256-4kg22Wi1TvTfsFWUABphlYcRrJIkAmKR5OP5L4Om/X8=", + "version": "2.9.0" + } + } + } +} \ No newline at end of file diff --git a/pkgs/python-modules/torch/binary/torch-versions.json b/pkgs/python-modules/torch/binary/torch-versions.json new file mode 100644 index 00000000..70167a72 --- /dev/null +++ b/pkgs/python-modules/torch/binary/torch-versions.json @@ -0,0 +1,99 @@ +[ + { + "torchVersion": "2.8.0", + "cudaVersion": "12.6", + "cxx11Abi": true, + "systems": ["x86_64-linux"] + }, + { + "torchVersion": "2.8.0", + "cudaVersion": "12.8", + "cxx11Abi": true, + "systems": ["x86_64-linux"] + }, + { + "torchVersion": "2.8.0", + "cudaVersion": "12.9", + "cxx11Abi": true, + "systems": ["x86_64-linux", "aarch64-linux"] + }, + { + "torchVersion": "2.8.0", + "rocmVersion": "6.3", + "cxx11Abi": true, + "systems": ["x86_64-linux"] + }, + { + "torchVersion": "2.8.0", + "rocmVersion": "6.4", + "cxx11Abi": true, + "systems": ["x86_64-linux"] + }, + { + "torchVersion": "2.8.0", + "cxx11Abi": true, + "metal": true, + "systems": ["aarch64-darwin"] + }, + { + "torchVersion": "2.8.0", + "cxx11Abi": true, + "cpu": true, + "systems": ["aarch64-linux", "x86_64-linux"] + }, + { + "torchVersion": "2.8.0", + "xpuVersion": "2025.1.3", + "cxx11Abi": true, + "systems": ["x86_64-linux"] + }, + + { + "torchVersion": "2.9.0", + "cudaVersion": "12.6", + "cxx11Abi": true, + "systems": ["x86_64-linux", "aarch64-linux"] + }, + { + "torchVersion": "2.9.0", + "cudaVersion": "12.8", + "cxx11Abi": true, + "systems": ["x86_64-linux", "aarch64-linux"] + }, + { + "torchVersion": "2.9.0", + "cudaVersion": "13.0", + "cxx11Abi": true, + "systems": ["x86_64-linux", "aarch64-linux"] + }, + { + "torchVersion": "2.9.0", + "rocmVersion": "6.3", + "cxx11Abi": true, + "systems": ["x86_64-linux"] + }, + { + "torchVersion": "2.9.0", + "rocmVersion": "6.4", + "cxx11Abi": true, + "systems": ["x86_64-linux"] + }, + { + "torchVersion": "2.9.0", + "cxx11Abi": true, + "metal": true, + "systems": ["aarch64-darwin"] + }, + { + "torchVersion": "2.9.0", + "cxx11Abi": true, + "cpu": true, + "systems": ["aarch64-linux", "x86_64-linux"] + }, + { + "torchVersion": "2.9.0", + "xpuVersion": "2025.2.1", + "cxx11Abi": true, + "systems": ["x86_64-linux"] + } +] diff --git a/pkgs/python-modules/torch/source/2_8/0001-patch-xpu-ops-CMake.patch b/pkgs/python-modules/torch/source/2_8/0001-patch-xpu-ops-CMake.patch new file mode 100644 index 00000000..479446b0 --- /dev/null +++ b/pkgs/python-modules/torch/source/2_8/0001-patch-xpu-ops-CMake.patch @@ -0,0 +1,50 @@ +From 3b5dd2c142173a9a6afe6b21a3107e04d7fcc0ea Mon Sep 17 00:00:00 2001 +From: "Wang, Yi A" +Date: Mon, 25 Aug 2025 17:39:38 -0700 +Subject: [PATCH] patch xpu ops CMake + +Signed-off-by: Wang, Yi A +--- + cmake/BuildFlags.cmake | 1 + + cmake/Modules/FindSYCL.cmake | 6 +++++- + 2 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/cmake/BuildFlags.cmake b/cmake/BuildFlags.cmake +index ec9aaccb..8f4740df 100644 +--- a/cmake/BuildFlags.cmake ++++ b/cmake/BuildFlags.cmake +@@ -83,6 +83,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" + set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-approx-func) + set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-absolute-value) + set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -no-ftz) ++ set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -D_GLIBCXX_USE_CXX11_ABI=1) + endif() + + if(CMAKE_BUILD_TYPE MATCHES Debug) +diff --git a/cmake/Modules/FindSYCL.cmake b/cmake/Modules/FindSYCL.cmake +index 86457ba3..0feec0fa 100644 +--- a/cmake/Modules/FindSYCL.cmake ++++ b/cmake/Modules/FindSYCL.cmake +@@ -34,7 +34,8 @@ + # SYCL_ADD_EXECUTABLE + # + # SYCL_ADD_LIBRARY +- ++set(SYCL_EXTRA_INCLUDE_DIRS $ENV{SYCL_EXTRA_INCLUDE_DIRS}) ++string(REPLACE " " ";" SYCL_EXTRA_INCLUDE_DIRS "${SYCL_EXTRA_INCLUDE_DIRS}") + macro(SYCL_FIND_HELPER_FILE _name _extension) + set(_full_name "${_name}.${_extension}") + # CMAKE_CURRENT_LIST_FILE contains the full path to the file currently being +@@ -216,6 +217,9 @@ macro(SYCL_WRAP_SRCS sycl_target generated_files) + set(generated_extension ${CMAKE_${SYCL_C_OR_CXX}_OUTPUT_EXTENSION}) + + set(SYCL_include_dirs "${SYCL_INCLUDE_DIR}") ++ if(DEFINED SYCL_EXTRA_INCLUDE_DIRS) ++ list(APPEND SYCL_include_dirs ${SYCL_EXTRA_INCLUDE_DIRS}) ++ endif() + list(APPEND SYCL_include_dirs "$") + + set(SYCL_compile_definitions "$") +-- +2.43.0 + diff --git a/pkgs/python-modules/torch/source/2_8/cmake-load-hip-invalid-state.diff b/pkgs/python-modules/torch/source/2_8/cmake-load-hip-invalid-state.diff new file mode 100644 index 00000000..fa1a3e10 --- /dev/null +++ b/pkgs/python-modules/torch/source/2_8/cmake-load-hip-invalid-state.diff @@ -0,0 +1,40 @@ +diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake +index cae0ca62f23..ed2bd0862f6 100644 +--- a/cmake/public/LoadHIP.cmake ++++ b/cmake/public/LoadHIP.cmake +@@ -93,24 +93,25 @@ if(HIP_FOUND) + # hip (lower-case) package. Both are probed above and will be in + # ROCM_INCLUDE_DIRS if available. + find_file(ROCM_VERSION_HEADER_PATH +- NAMES rocm-core/rocm_version.h ++ NAMES ++ rocm-core/rocm_version.h # Higher priority ++ hip/hip_version.h # Lower priority + NO_DEFAULT_PATH + PATHS ${ROCM_INCLUDE_DIRS} + ) +- set(ROCM_LIB_NAME "ROCM") +- if(NOT ROCM_VERSION_HEADER_PATH) +- find_file(ROCM_VERSION_HEADER_PATH +- NAMES hip/hip_version.h +- NO_DEFAULT_PATH +- PATHS ${ROCM_INCLUDE_DIRS} +- ) +- set(ROCM_LIB_NAME "HIP") +- endif() ++ + if(NOT ROCM_VERSION_HEADER_PATH) + message(FATAL_ERROR "Could not find hip/hip_version.h or rocm-core/rocm_version.h in ${ROCM_INCLUDE_DIRS}") + endif() ++ + get_filename_component(ROCM_HEADER_NAME ${ROCM_VERSION_HEADER_PATH} NAME) + ++ if(ROCM_HEADER_NAME STREQUAL "rocm_version.h") ++ set(ROCM_LIB_NAME "ROCM") ++ else() ++ set(ROCM_LIB_NAME "HIP") ++ endif() ++ + if(EXISTS ${ROCM_VERSION_HEADER_PATH}) + set(ROCM_HEADER_FILE ${ROCM_VERSION_HEADER_PATH}) + else() diff --git a/pkgs/python-modules/torch/source/2_8/default.nix b/pkgs/python-modules/torch/source/2_8/default.nix new file mode 100644 index 00000000..fec710fd --- /dev/null +++ b/pkgs/python-modules/torch/source/2_8/default.nix @@ -0,0 +1,761 @@ +{ + stdenv, + stdenvAdapters, + lib, + fetchFromGitHub, + buildPythonPackage, + python, + config, + cudaSupport ? config.cudaSupport, + cudaPackages, + autoAddDriverRunpath, + effectiveMagma ? + if cudaSupport then + magma-cuda-static + else if rocmSupport then + magma-hip + else + magma, + effectiveStdenv ? if cudaSupport then cudaPackages.backendStdenv else stdenv, + magma, + magma-hip, + magma-cuda-static, + # Use the system NCCL as long as we're targeting CUDA on a supported platform. + useSystemNccl ? (cudaSupport && !cudaPackages.nccl.meta.unsupported || rocmSupport), + MPISupport ? false, + mpi, + nvtx, + buildDocs ? false, + cxx11Abi ? true, + + # tests.cudaAvailable: + callPackage, + + # Native build inputs + cmake, + symlinkJoin, + which, + pybind11, + removeReferencesTo, + + # Build inputs + apple-sdk_15, + libdrm, + numactl, + + # dependencies + astunparse, + binutils, + expecttest, + filelock, + fsspec, + hypothesis, + jinja2, + networkx, + packaging, + psutil, + pyyaml, + requests, + setuptools, + sympy, + types-dataclasses, + typing-extensions, + # ROCm build and `torch.compile` requires `triton` + tritonSupport ? (!stdenv.hostPlatform.isDarwin), + triton, + + # TODO: 1. callPackage needs to learn to distinguish between the task + # of "asking for an attribute from the parent scope" and + # the task of "exposing a formal parameter in .override". + # TODO: 2. We should probably abandon attributes such as `torchWithCuda` (etc.) + # as they routinely end up consuming the wrong arguments\ + # (dependencies without cuda support). + # Instead we should rely on overlays and nixpkgsFun. + # (@SomeoneSerge) + _tritonEffective ? + if cudaSupport then + triton-cuda + else if xpuSupport then + python.pkgs.triton-xpu_2_8 + else + triton, + triton-cuda, + + # Disable MKLDNN on aarch64-darwin, it negatively impacts performance, + # this is also what official pytorch build does + mklDnnSupport ? !(stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64), + + # virtual pkg that consistently instantiates blas across nixpkgs + # See https://github.com/NixOS/nixpkgs/pull/83888 + blas, + + # ninja (https://ninja-build.org) must be available to run C++ extensions tests, + ninja, + + # dependencies for torch.utils.tensorboard + pillow, + six, + tensorboard, + protobuf, + + # ROCm dependencies + rocmSupport ? config.rocmSupport, + rocmPackages, + xpuSupport ? (config.xpuSupport or false), + xpuPackages, + gpuTargets ? [ ], +}: + +let + inherit (lib) + attrsets + lists + strings + trivial + ; + inherit (cudaPackages) cudnn nccl; + cudaFlags = cudaPackages.flags; + + triton = throw "python3Packages.torch: use _tritonEffective instead of triton to avoid divergence"; + + setBool = v: if v then "1" else "0"; + + archs = (import ../../archs.nix)."2.8"; + + supportedTorchCudaCapabilities = + let + inherit (archs) capsPerCudaVersion; + real = capsPerCudaVersion."${lib.versions.majorMinor cudaPackages.cudaMajorMinorVersion}"; + ptx = lists.map (x: "${x}+PTX") real; + in + real ++ ptx; + + inherit (archs) supportedTorchRocmArchs; + + # NOTE: The lists.subtractLists function is perhaps a bit unintuitive. It subtracts the elements + # of the first list *from* the second list. That means: + # lists.subtractLists a b = b - a + + # For CUDA + supportedCudaCapabilities = lists.intersectLists cudaFlags.cudaCapabilities supportedTorchCudaCapabilities; + unsupportedCudaCapabilities = lists.subtractLists supportedCudaCapabilities cudaFlags.cudaCapabilities; + + # Use trivial.warnIf to print a warning if any unsupported GPU targets are specified. + gpuArchWarner = + supported: unsupported: + trivial.throwIf (supported == [ ]) ( + "No supported GPU targets specified. Requested GPU targets: " + + strings.concatStringsSep ", " unsupported + ) supported; + + # Create the gpuTargetString. + gpuTargetString = strings.concatStringsSep ";" ( + if gpuTargets != [ ] then + # If gpuTargets is specified, it always takes priority. + gpuTargets + else if cudaSupport then + gpuArchWarner supportedCudaCapabilities unsupportedCudaCapabilities + else if rocmSupport then + supportedTorchRocmArchs + else + throw "No GPU targets specified" + ); + + # Use rocm/pytorch for ROCm 7, since it's not supported by PyTorch 2.8. + rocmTorch = rocmSupport && (lib.versions.majorMinor rocmPackages.rocm.version == "7.0"); + rocmtoolkit_joined = symlinkJoin { + name = "rocm-merged"; + + paths = with rocmPackages; [ + aotriton_0_10 + clr + comgr + hipblas + hipblas-common-devel + hipblaslt + hipfft + hipify-clang + hiprand + hipsolver + hipsparse + hipsparselt + hsa-rocr + miopen-hip + rccl + rocblas + rocm-core + rocm-device-libs + rocm-hip-runtime + rocm-smi-lib + rocminfo + rocrand + rocsolver + rocsparse + roctracer + ]; + + postBuild = '' + # Fix `setuptools` not being found + rm -rf $out/nix-support + + # Variables that we want to pass through to downstream derivations. + mkdir -p $out/nix-support + echo 'export ROCM_PATH="${placeholder "out"}"' >> $out/nix-support/setup-hook + echo 'export ROCM_SOURCE_DIR="${placeholder "out"}"' >> $out/nix-support/setup-hook + echo 'export CMAKE_CXX_FLAGS="-I${placeholder "out"}/include -I${placeholder "out"}/include/rocblas"' >> $out/nix-support/setup-hook + ''; + }; + + brokenConditions = attrsets.filterAttrs (_: cond: cond) { + "CUDA and ROCm are mutually exclusive" = cudaSupport && rocmSupport; + "CUDA is not targeting Linux" = cudaSupport && !stdenv.hostPlatform.isLinux; + "Unsupported CUDA version" = + cudaSupport + && !(builtins.elem cudaPackages.cudaMajorVersion [ + "11" + "12" + ]); + "MPI cudatoolkit does not match cudaPackages.cudatoolkit" = + MPISupport && cudaSupport && (mpi.cudatoolkit != cudaPackages.cudatoolkit); + # This used to be a deep package set comparison between cudaPackages and + # effectiveMagma.cudaPackages, making torch too strict in cudaPackages. + # In particular, this triggered warnings from cuda's `aliases.nix` + "Magma cudaPackages does not match cudaPackages" = + cudaSupport + && (effectiveMagma.cudaPackages.cudaMajorMinorVersion != cudaPackages.cudaMajorMinorVersion); + #"Rocm support is currently broken because `rocmPackages.hipblaslt` is unpackaged. (2024-06-09)" = + # rocmSupport; + }; + torchXpuOpsSrc = + if xpuSupport then + fetchFromGitHub { + owner = "intel"; + repo = "torch-xpu-ops"; + rev = "3a9419c8bb6a98dd3e3cd473c36691fb4abeae40"; + hash = "sha256-cNNnqJXfFO7UOJtXmDGKS2s1Jjs0+/AztPMjE3K/YG0="; + } + else + null; +in +buildPythonPackage rec { + pname = "torch"; + version = "2.8.0"; + pyproject = true; + + stdenv = effectiveStdenv; + + outputs = [ + "out" # output standard python package + "dev" # output libtorch headers + "lib" # output libtorch libraries + "cxxdev" # propagated deps for the cmake consumers of torch + ]; + cudaPropagateToOutput = "cxxdev"; + rocmPropagateToOutput = "cxxdev"; + + src = + if rocmTorch then + fetchFromGitHub { + owner = "ROCm"; + repo = "pytorch"; + rev = "245bf6edbc7e4b6aabbb4a218b518b853225956c"; + fetchSubmodules = true; + hash = "sha256-vQL9rjrQjQDnMwNB0NNKuecp8PENOib9Y+K5J9ZaiFM="; + } + else + fetchFromGitHub { + owner = "pytorch"; + repo = "pytorch"; + tag = "v${version}"; + fetchSubmodules = true; + hash = "sha256-5JDYFoBe6bC9Dz143Bm/5OEOWsQxjctAR9fI4f6G2W8="; + }; + + patches = [ + ./mkl-rpath.patch + ] + ++ lib.optionals cudaSupport [ ./fix-cmake-cuda-toolkit.patch ] + ++ lib.optionals (!rocmTorch) [ ./cmake-load-hip-invalid-state.diff ] + ++ lib.optionals (stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isx86_64) [ + # pthreadpool added support for Grand Central Dispatch in April + # 2020. However, this relies on functionality (DISPATCH_APPLY_AUTO) + # that is available starting with macOS 10.13. However, our current + # base is 10.12. Until we upgrade, we can fall back on the older + # pthread support. + ./pthreadpool-disable-gcd.diff + ] + ++ lib.optionals stdenv.hostPlatform.isLinux [ + # Propagate CUPTI to Kineto by overriding the search path with environment variables. + # https://github.com/pytorch/pytorch/pull/108847 + ./pytorch-pr-108847.patch + ]; + + postUnpack = lib.optionalString xpuSupport '' + cp -r --no-preserve=mode ${torchXpuOpsSrc} $sourceRoot/third_party/torch-xpu-ops + patch -d $sourceRoot/third_party/torch-xpu-ops -p1 < ${./0001-patch-xpu-ops-CMake.patch} + ''; + + postPatch = + let + pyiGenPath = "${typing-extensions}/${python.sitePackages}:${pyyaml}/${python.sitePackages}"; + in + '' + substituteInPlace pyproject.toml \ + --replace-fail "setuptools>=62.3.0,<80.0" \ + "setuptools>=62.3.0" + + substituteInPlace cmake/public/cuda.cmake \ + --replace-fail \ + 'message(FATAL_ERROR "Found two conflicting CUDA' \ + 'message(WARNING "Found two conflicting CUDA' \ + --replace-warn \ + "set(CUDAToolkit_ROOT" \ + "# Upstream: set(CUDAToolkit_ROOT" + substituteInPlace third_party/gloo/cmake/Cuda.cmake \ + --replace-warn "find_package(CUDAToolkit 7.0" "find_package(CUDAToolkit" + + # annotations (3.7), print_function (3.0), with_statement (2.6) are all supported + sed -i -e "/from __future__ import/d" **.py + substituteInPlace third_party/NNPACK/CMakeLists.txt \ + --replace-fail "PYTHONPATH=" 'PYTHONPATH=$ENV{PYTHONPATH}:' + # flag from cmakeFlags doesn't work, not clear why + # setting it at the top of NNPACK's own CMakeLists does + sed -i '2s;^;set(PYTHON_SIX_SOURCE_DIR ${six.src})\n;' third_party/NNPACK/CMakeLists.txt + + # Ensure that torch profiler unwind uses addr2line from nix + substituteInPlace torch/csrc/profiler/unwind/unwind.cpp \ + --replace-fail 'addr2line_binary_ = "addr2line"' 'addr2line_binary_ = "${lib.getExe' binutils "addr2line"}"' + + # gen_pyi needs typing-extensions. + substituteInPlace torch/CMakeLists.txt \ + --replace-fail "env PYTHONPATH=\"\''${TORCH_ROOT}\"" \ + "env PYTHONPATH=\"\''${TORCH_ROOT}:${pyiGenPath}\"" + '' + + lib.optionalString rocmSupport '' + # https://github.com/facebookincubator/gloo/pull/297 + substituteInPlace third_party/gloo/cmake/Hipify.cmake \ + --replace-fail "\''${HIPIFY_COMMAND}" "python \''${HIPIFY_COMMAND}" + + # Replace hard-coded rocm paths + substituteInPlace caffe2/CMakeLists.txt \ + --replace-fail "/opt/rocm" "${rocmtoolkit_joined}" + + # Strangely, this is never set in cmake + substituteInPlace cmake/public/LoadHIP.cmake \ + --replace-fail "set(ROCM_PATH \$ENV{ROCM_PATH})" \ + "set(ROCM_PATH \$ENV{ROCM_PATH})''\nset(ROCM_VERSION ${lib.concatStrings (lib.intersperse "0" (lib.splitVersion rocmPackages.clr.version))})" + '' + # Detection of NCCL version doesn't work particularly well when using the static binary. + + lib.optionalString cudaSupport '' + substituteInPlace cmake/Modules/FindNCCL.cmake \ + --replace-fail \ + 'message(FATAL_ERROR "Found NCCL header version and library version' \ + 'message(WARNING "Found NCCL header version and library version' + '' + # Remove PyTorch's FindCUDAToolkit.cmake and use CMake's default. + # NOTE: Parts of pytorch rely on unmaintained FindCUDA.cmake with custom patches to support e.g. + # newer architectures (sm_90a). We do want to delete vendored patches, but have to keep them + # until https://github.com/pytorch/pytorch/issues/76082 is addressed + + lib.optionalString cudaSupport '' + rm cmake/Modules/FindCUDAToolkit.cmake + '' + + lib.optionalString xpuSupport '' + # replace oneapi DIR + substituteInPlace cmake/Modules/FindMKL.cmake \ + --replace-fail 'SET(DEFAULT_INTEL_ONEAPI_DIR "/opt/intel/oneapi")' 'SET(DEFAULT_INTEL_ONEAPI_DIR ${xpuPackages.oneapi-torch-dev}/oneapi)' + # replace mkldnn build for xpu + sed -i '/ExternalProject_Add(xpu_mkldnn_proj/,/^ *)/s/^/#/' cmake/Modules/FindMKLDNN.cmake + substituteInPlace cmake/Modules/FindMKLDNN.cmake \ + --replace-fail 'ExternalProject_Get_Property(xpu_mkldnn_proj SOURCE_DIR BINARY_DIR)' '# ExternalProject_Get_Property(xpu_mkldnn_proj SOURCE_DIR BINARY_DIR)' \ + --replace-fail "set(XPU_MKLDNN_LIBRARIES \''${BINARY_DIR}/src/\''${DNNL_LIB_NAME})" "set(XPU_MKLDNN_LIBRARIES ${xpuPackages.onednn-xpu}/lib/libdnnl.a)" \ + --replace-fail "set(XPU_MKLDNN_INCLUDE \''${SOURCE_DIR}/include \''${BINARY_DIR}/include)" "set(XPU_MKLDNN_INCLUDE ${xpuPackages.onednn-xpu}/include)" + # comment torch-xpu-ops git clone block in pytorch/caffe2/CMakeLists.txt + sed -i '/set(TORCH_XPU_OPS_REPO_URL/,/^ endif()/s/^/#/' caffe2/CMakeLists.txt + sed -i '/execute_process(/,/^ endif()/s/^/#/' caffe2/CMakeLists.txt + '' + # error: no member named 'aligned_alloc' in the global namespace; did you mean simply 'aligned_alloc' + # This lib overrided aligned_alloc hence the error message. Tltr: his function is linkable but not in header. + + + lib.optionalString + (stdenv.hostPlatform.isDarwin && lib.versionOlder stdenv.hostPlatform.darwinSdkVersion "11.0") + '' + substituteInPlace third_party/pocketfft/pocketfft_hdronly.h --replace-fail '#if (__cplusplus >= 201703L) && (!defined(__MINGW32__)) && (!defined(_MSC_VER)) + inline void *aligned_alloc(size_t align, size_t size)' '#if 0 + inline void *aligned_alloc(size_t align, size_t size)' + ''; + + # NOTE(@connorbaker): Though we do not disable Gloo or MPI when building with CUDA support, caution should be taken + # when using the different backends. Gloo's GPU support isn't great, and MPI and CUDA can't be used at the same time + # without extreme care to ensure they don't lock each other out of shared resources. + # For more, see https://github.com/open-mpi/ompi/issues/7733#issuecomment-629806195. + preConfigure = + lib.optionalString cudaSupport '' + export TORCH_CUDA_ARCH_LIST="${gpuTargetString}" + export CUPTI_INCLUDE_DIR=${lib.getDev cudaPackages.cuda_cupti}/include + export CUPTI_LIBRARY_DIR=${lib.getLib cudaPackages.cuda_cupti}/lib + '' + + lib.optionalString (cudaSupport && cudaPackages ? cudnn) '' + export CUDNN_INCLUDE_DIR=${lib.getLib cudnn}/include + export CUDNN_LIB_DIR=${cudnn.lib}/lib + '' + + lib.optionalString rocmSupport '' + export PYTORCH_ROCM_ARCH="${gpuTargetString}" + python tools/amd_build/build_amd.py + ''; + + # Use pytorch's custom configurations + dontUseCmakeConfigure = true; + + # causes possible redefinition of _FORTIFY_SOURCE + hardeningDisable = [ "fortify3" ]; + + BUILD_NAMEDTENSOR = setBool true; + BUILD_DOCS = setBool buildDocs; + + # We only do an imports check, so do not build tests either. + BUILD_TEST = setBool false; + + # ninja hook doesn't automatically turn on ninja + # because pytorch setup.py is responsible for this + CMAKE_GENERATOR = "Ninja"; + + # Whether to use C++11 ABI (or earlier). + _GLIBCXX_USE_CXX11_ABI = setBool cxx11Abi; + + # Unlike MKL, oneDNN (née MKLDNN) is FOSS, so we enable support for + # it by default. PyTorch currently uses its own vendored version + # of oneDNN through Intel iDeep. + USE_MKLDNN = setBool mklDnnSupport; + USE_MKLDNN_CBLAS = setBool mklDnnSupport; + + # Avoid using pybind11 from git submodule + # Also avoids pytorch exporting the headers of pybind11 + USE_SYSTEM_PYBIND11 = true; + + cmakeFlags = [ + # (lib.cmakeBool "CMAKE_FIND_DEBUG_MODE" true) + (lib.cmakeFeature "CUDAToolkit_VERSION" cudaPackages.cudaMajorMinorVersion) + ] + ++ lib.optionals cudaSupport [ + # Unbreaks version discovery in enable_language(CUDA) when wrapping nvcc with ccache + # Cf. https://gitlab.kitware.com/cmake/cmake/-/issues/26363 + (lib.cmakeFeature "CMAKE_CUDA_COMPILER_TOOLKIT_VERSION" cudaPackages.cudaMajorMinorVersion) + ]; + + preBuild = '' + export MAX_JOBS=$NIX_BUILD_CORES + ${python.pythonOnBuildForHost.interpreter} setup.py build --cmake-only + ${cmake}/bin/cmake build + ''; + + preFixup = '' + function join_by { local IFS="$1"; shift; echo "$*"; } + function strip2 { + IFS=':' + read -ra RP <<< $(patchelf --print-rpath $1) + IFS=' ' + RP_NEW=$(join_by : ''${RP[@]:2}) + patchelf --set-rpath \$ORIGIN:''${RP_NEW} "$1" + } + for f in $(find ''${out} -name 'libcaffe2*.so') + do + strip2 $f + done + ''; + + # Override the (weirdly) wrong version set by default. See + # https://github.com/NixOS/nixpkgs/pull/52437#issuecomment-449718038 + # https://github.com/pytorch/pytorch/blob/v1.0.0/setup.py#L267 + PYTORCH_BUILD_VERSION = version; + PYTORCH_BUILD_NUMBER = 0; + + # In-tree builds of NCCL are not supported. + # Use NCCL when cudaSupport is enabled and nccl is available. + USE_NCCL = setBool useSystemNccl; + USE_SYSTEM_NCCL = USE_NCCL; + USE_STATIC_NCCL = USE_NCCL; + + # Set the correct Python library path, broken since + # https://github.com/pytorch/pytorch/commit/3d617333e + PYTHON_LIB_REL_PATH = "${placeholder "out"}/${python.sitePackages}"; + + # Suppress a weird warning in mkl-dnn, part of ideep in pytorch + # (upstream seems to have fixed this in the wrong place?) + # https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc + # https://github.com/pytorch/pytorch/issues/22346 + # + # Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++: + # https://github.com/pytorch/pytorch/blob/v1.11.0/setup.py#L17 + env = { + # Builds faster without this and we don't have enough inputs that cmd length is an issue + NIX_CC_USE_RESPONSE_FILE = 0; + + NIX_CFLAGS_COMPILE = toString ( + (lib.optionals (blas.implementation == "mkl") [ "-Wno-error=array-bounds" ] ++ [ "-Wno-error" ]) + ); + } + // lib.optionalAttrs rocmSupport { + AOTRITON_INSTALLED_PREFIX = rocmPackages.aotriton_0_10; + } + // lib.optionalAttrs stdenv.hostPlatform.isDarwin { + USE_MPS = 1; + } + // lib.optionalAttrs xpuSupport { + MKLROOT = xpuPackages.oneapi-torch-dev; + SYCL_ROOT = xpuPackages.oneapi-torch-dev; + }; + + nativeBuildInputs = [ + cmake + ninja + pybind11 + removeReferencesTo + which + ] + ++ lib.optionals cudaSupport ( + with cudaPackages; + [ + autoAddDriverRunpath + cuda_nvcc + ] + ) + ++ lib.optionals rocmSupport [ + rocmtoolkit_joined + rocmPackages.setupRocmHook + ] + ++ lib.optionals xpuSupport ( + with xpuPackages; + [ + ocloc + oneapi-torch-dev + ] + ); + + buildInputs = [ + blas + blas.provider + ] + ++ lib.optionals cudaSupport ( + with cudaPackages; + [ + cuda_cccl # + cuda_cudart # cuda_runtime.h and libraries + cuda_cupti # For kineto + cuda_profiler_api # + cuda_nvcc # crt/host_config.h; even though we include this in nativeBuildInputs, it's needed here too + cuda_nvml_dev # + cuda_nvrtc + #cuda_nvtx # -llibNVToolsExt + nvtx + libcublas + libcufile + libcufft + libcurand + libcusolver + libcusparse + ] + ++ lists.optionals (cudaPackages ? cudnn) [ cudnn ] + ++ lists.optionals useSystemNccl [ + # Some platforms do not support NCCL (i.e., Jetson) + nccl # Provides nccl.h AND a static copy of NCCL! + ] + ) + ++ lib.optionals rocmSupport ( + with rocmPackages; + [ + composablekernel-devel + hipcub-devel + libdrm + openmp + rocmtoolkit_joined + rocprim-devel + rocthrust-devel + ] + ) + ++ lib.optionals xpuSupport ( + with xpuPackages; + [ + oneapi-torch-dev + onednn-xpu + ] + ) + ++ lib.optionals (cudaSupport || rocmSupport) [ effectiveMagma ] + ++ lib.optionals stdenv.hostPlatform.isLinux [ numactl ] + ++ lib.optionals stdenv.hostPlatform.isDarwin [ + apple-sdk_15 + ] + ++ lib.optionals tritonSupport [ _tritonEffective ] + ++ lib.optionals MPISupport [ mpi ]; + + pythonRelaxDeps = [ + "sympy" + ]; + dependencies = [ + astunparse + expecttest + filelock + fsspec + hypothesis + jinja2 + networkx + ninja + packaging + psutil + pyyaml + requests + sympy + types-dataclasses + typing-extensions + + # the following are required for tensorboard support + pillow + six + tensorboard + protobuf + + # torch/csrc requires `pybind11` at runtime + pybind11 + ] + ++ lib.optionals (lib.versionAtLeast python.version "3.12") [ setuptools ] + ++ lib.optionals tritonSupport [ _tritonEffective ]; + + propagatedCxxBuildInputs = + [ ] ++ lib.optionals MPISupport [ mpi ] ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]; + + # Tests take a long time and may be flaky, so just sanity-check imports + doCheck = false; + + pythonImportsCheck = [ "torch" ]; + + nativeCheckInputs = [ + hypothesis + ninja + psutil + ]; + + checkPhase = + with lib.versions; + with lib.strings; + concatStringsSep " " [ + "runHook preCheck" + "${python.interpreter} test/run_test.py" + "--exclude" + (concatStringsSep " " [ + "utils" # utils requires git, which is not allowed in the check phase + + # "dataloader" # psutils correctly finds and triggers multiprocessing, but is too sandboxed to run -- resulting in numerous errors + # ^^^^^^^^^^^^ NOTE: while test_dataloader does return errors, these are acceptable errors and do not interfere with the build + + # tensorboard has acceptable failures for pytorch 1.3.x due to dependencies on tensorboard-plugins + (optionalString (majorMinor version == "1.3") "tensorboard") + ]) + "runHook postCheck" + ]; + + pythonRemoveDeps = [ + # In our dist-info the name is just "triton" + "pytorch-triton-rocm" + ]; + + postInstall = '' + find "$out/${python.sitePackages}/torch/include" "$out/${python.sitePackages}/torch/lib" -type f -exec remove-references-to -t ${effectiveStdenv.cc} '{}' + + + mkdir $dev + cp -r $out/${python.sitePackages}/torch/include $dev/include + cp -r $out/${python.sitePackages}/torch/share $dev/share + + # Fix up library paths for split outputs + substituteInPlace \ + $dev/share/cmake/Torch/TorchConfig.cmake \ + --replace-fail \''${TORCH_INSTALL_PREFIX}/lib "$lib/lib" + + substituteInPlace \ + $dev/share/cmake/Caffe2/Caffe2Targets-release.cmake \ + --replace-fail \''${_IMPORT_PREFIX}/lib "$lib/lib" + + mkdir $lib + mv $out/${python.sitePackages}/torch/lib $lib/lib + ln -s $lib/lib $out/${python.sitePackages}/torch/lib + '' + + lib.optionalString rocmSupport '' + substituteInPlace $dev/share/cmake/Tensorpipe/TensorpipeTargets-release.cmake \ + --replace-fail "\''${_IMPORT_PREFIX}/lib64" "$lib/lib" + + substituteInPlace $dev/share/cmake/ATen/ATenConfig.cmake \ + --replace-fail "/build/source/torch/include" "$dev/include" + ''; + + postFixup = '' + mkdir -p "$cxxdev/nix-support" + printWords "''${propagatedCxxBuildInputs[@]}" >> "$cxxdev/nix-support/propagated-build-inputs" + '' + + lib.optionalString stdenv.hostPlatform.isDarwin '' + for f in $(ls $lib/lib/*.dylib); do + install_name_tool -id $lib/lib/$(basename $f) $f || true + done + + install_name_tool -change @rpath/libshm.dylib $lib/lib/libshm.dylib $lib/lib/libtorch_python.dylib + install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libtorch_python.dylib + install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch_python.dylib + + install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch.dylib + + install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libshm.dylib + install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libshm.dylib + ''; + + # See https://github.com/NixOS/nixpkgs/issues/296179 + # + # This is a quick hack to add `libnvrtc` to the runpath so that torch can find + # it when it is needed at runtime. + extraRunpaths = lib.optionals cudaSupport [ "${lib.getLib cudaPackages.cuda_nvrtc}/lib" ]; + postPhases = lib.optionals stdenv.hostPlatform.isLinux [ "postPatchelfPhase" ]; + postPatchelfPhase = '' + while IFS= read -r -d $'\0' elf ; do + for extra in $extraRunpaths ; do + echo patchelf "$elf" --add-rpath "$extra" >&2 + patchelf "$elf" --add-rpath "$extra" + done + done < <( + find "''${!outputLib}" "$out" -type f -iname '*.so' -print0 + ) + ''; + + # Builds in 2+h with 2 cores, and ~15m with a big-parallel builder. + requiredSystemFeatures = [ "big-parallel" ]; + + passthru = { + inherit + cudaSupport + cudaPackages + cxx11Abi + rocmSupport + rocmPackages + xpuSupport + xpuPackages + ; + cudaCapabilities = if cudaSupport then supportedCudaCapabilities else [ ]; + rocmArchs = if rocmSupport then supportedTorchRocmArchs else [ ]; + # At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability. + blasProvider = blas.provider; + # To help debug when a package is broken due to CUDA support + inherit brokenConditions; + tests = callPackage ./tests.nix { }; + }; + + meta = { + changelog = "https://github.com/pytorch/pytorch/releases/tag/v${version}"; + # keep PyTorch in the description so the package can be found under that name on search.nixos.org + description = "PyTorch: Tensors and Dynamic neural networks in Python with strong GPU acceleration"; + homepage = "https://pytorch.org/"; + license = lib.licenses.bsd3; + maintainers = with lib.maintainers; [ + teh + thoughtpolice + tscholak + ]; # tscholak esp. for darwin-related builds + platforms = + lib.platforms.linux ++ lib.optionals (!cudaSupport && !rocmSupport) lib.platforms.darwin; + broken = builtins.any trivial.id (builtins.attrValues brokenConditions); + }; +} diff --git a/pkgs/python-modules/torch/source/2_8/fix-cmake-cuda-toolkit.patch b/pkgs/python-modules/torch/source/2_8/fix-cmake-cuda-toolkit.patch new file mode 100644 index 00000000..35b6d23c --- /dev/null +++ b/pkgs/python-modules/torch/source/2_8/fix-cmake-cuda-toolkit.patch @@ -0,0 +1,51 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index c4cd4b2c2a..e983b21353 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -1319,10 +1319,6 @@ if(BUILD_SHARED_LIBS) + DIRECTORY ${PROJECT_SOURCE_DIR}/cmake/Modules_CUDA_fix + DESTINATION share/cmake/Caffe2/ + COMPONENT dev) +- install( +- FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUDAToolkit.cmake +- DESTINATION share/cmake/Caffe2/ +- COMPONENT dev) + install( + FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUSPARSELT.cmake + DESTINATION share/cmake/Caffe2/ +diff --git a/cmake/public/cuda.cmake b/cmake/public/cuda.cmake +index b37036d2f2..9878b70cf7 100644 +--- a/cmake/public/cuda.cmake ++++ b/cmake/public/cuda.cmake +@@ -61,9 +61,15 @@ find_package(CUDAToolkit REQUIRED) + cmake_policy(POP) + + if(NOT CMAKE_CUDA_COMPILER_VERSION VERSION_EQUAL CUDAToolkit_VERSION) +- message(FATAL_ERROR "Found two conflicting CUDA versions:\n" +- "V${CMAKE_CUDA_COMPILER_VERSION} in '${CUDA_INCLUDE_DIRS}' and\n" +- "V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIRS}'") ++ if(CUDA_INCLUDE_DIRS IN_LIST CUDAToolkit_INCLUDE_DIR) ++ message(STATUS "CUDA_INCLUDE_DIRS is a substring of CUDAToolkit_INCLUDE_DIR. " ++ "Setting CUDA_INCLUDE_DIRS to CUDAToolkit_INCLUDE_DIR.") ++ set(CUDA_INCLUDE_DIRS "${CUDAToolkit_INCLUDE_DIR}") ++ else() ++ message(FATAL_ERROR "Found two conflicting CUDA installs:\n" ++ "V${CMAKE_CUDA_COMPILER_VERSION} in '${CUDA_INCLUDE_DIRS}' and\n" ++ "V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIR}'") ++ endif() + endif() + + if(NOT TARGET CUDA::nvToolsExt) +diff --git a/tools/setup_helpers/cmake.py b/tools/setup_helpers/cmake.py +index 4d10b3db1a..48a463df59 100644 +--- a/tools/setup_helpers/cmake.py ++++ b/tools/setup_helpers/cmake.py +@@ -206,6 +206,8 @@ class CMake: + "BUILDING_WITH_TORCH_LIBS", + "CUDA_HOST_COMPILER", + "CUDA_NVCC_EXECUTABLE", ++ "CUDAToolkit_ROOT", ++ "CUDAToolkit_INCLUDE_DIR", + "CUDA_SEPARABLE_COMPILATION", + "CUDNN_LIBRARY", + "CUDNN_INCLUDE_DIR", diff --git a/pkgs/python-modules/torch/source/2_8/mkl-rpath.patch b/pkgs/python-modules/torch/source/2_8/mkl-rpath.patch new file mode 100644 index 00000000..bbb1bc2e --- /dev/null +++ b/pkgs/python-modules/torch/source/2_8/mkl-rpath.patch @@ -0,0 +1,17 @@ +diff --git a/cmake/public/mkl.cmake b/cmake/public/mkl.cmake +index 2f6d1fd905..f30464be07 100644 +--- a/cmake/public/mkl.cmake ++++ b/cmake/public/mkl.cmake +@@ -16,12 +16,6 @@ foreach(MKL_LIB IN LISTS MKL_LIBRARIES) + endif() + endforeach() + +-# TODO: This is a hack, it will not pick up architecture dependent +-# MKL libraries correctly; see https://github.com/pytorch/pytorch/issues/73008 +-set_property( +- TARGET caffe2::mkl PROPERTY INTERFACE_LINK_DIRECTORIES +- ${MKL_ROOT}/lib ${MKL_ROOT}/lib/intel64 ${MKL_ROOT}/lib/intel64_win ${MKL_ROOT}/lib/win-x64) +- + if(UNIX) + if(USE_STATIC_MKL) + foreach(MKL_LIB_PATH IN LISTS MKL_LIBRARIES) diff --git a/pkgs/python-modules/torch/source/2_8/pthreadpool-disable-gcd.diff b/pkgs/python-modules/torch/source/2_8/pthreadpool-disable-gcd.diff new file mode 100644 index 00000000..eddd4572 --- /dev/null +++ b/pkgs/python-modules/torch/source/2_8/pthreadpool-disable-gcd.diff @@ -0,0 +1,45 @@ +diff --git a/third_party/pthreadpool/CMakeLists.txt b/third_party/pthreadpool/CMakeLists.txt +index 0db3264..1ba91c4 100644 +--- a/third_party/pthreadpool/CMakeLists.txt ++++ b/third_party/pthreadpool/CMakeLists.txt +@@ -74,9 +74,7 @@ IF(EMSCRIPTEN) + LIST(APPEND PTHREADPOOL_SRCS src/shim.c) + ELSE() + LIST(APPEND PTHREADPOOL_SRCS src/portable-api.c src/memory.c) +- IF(APPLE AND (PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "default" OR PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "gcd")) +- LIST(APPEND PTHREADPOOL_SRCS src/gcd.c) +- ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$" AND (PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "default" OR PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "event")) ++ IF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$" AND (PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "default" OR PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "event")) + LIST(APPEND PTHREADPOOL_SRCS src/windows.c) + ELSE() + LIST(APPEND PTHREADPOOL_SRCS src/pthreads.c) +@@ -111,10 +109,6 @@ ELSEIF(PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "futex") + TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_FUTEX=1) + TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_GCD=0) + TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_EVENT=0) +-ELSEIF(PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "gcd") +- TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_FUTEX=0) +- TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_GCD=1) +- TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_EVENT=0) + ELSEIF(PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "event") + TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_FUTEX=0) + TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_GCD=0) +diff --git a/third_party/pthreadpool/src/threadpool-common.h b/third_party/pthreadpool/src/threadpool-common.h +index ca84744..244d0ca 100644 +--- a/third_party/pthreadpool/src/threadpool-common.h ++++ b/third_party/pthreadpool/src/threadpool-common.h +@@ -14,14 +14,6 @@ + #endif + #endif + +-#ifndef PTHREADPOOL_USE_GCD +- #if defined(__APPLE__) +- #define PTHREADPOOL_USE_GCD 1 +- #else +- #define PTHREADPOOL_USE_GCD 0 +- #endif +-#endif +- + #ifndef PTHREADPOOL_USE_EVENT + #if defined(_WIN32) || defined(__CYGWIN__) + #define PTHREADPOOL_USE_EVENT 1 diff --git a/pkgs/python-modules/torch/source/2_8/pytorch-pr-108847.patch b/pkgs/python-modules/torch/source/2_8/pytorch-pr-108847.patch new file mode 100644 index 00000000..9511191c --- /dev/null +++ b/pkgs/python-modules/torch/source/2_8/pytorch-pr-108847.patch @@ -0,0 +1,31 @@ +From bf4050edab9f294a8e0060c47f906cd7a80f25a2 Mon Sep 17 00:00:00 2001 +From: Samuel Ainsworth +Date: Sat, 9 Sep 2023 02:04:09 +0000 +Subject: [PATCH] Dependencies.cmake: support building against CUPTI outside of + CUDA_SOURCE_DIR + +Limitation discovered in https://github.com/NixOS/nixpkgs/pull/249259. +--- + cmake/Dependencies.cmake | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index 0602d534dc4c14..5f6a5f79f3e3dc 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1879,6 +1879,7 @@ if(USE_KINETO) + ${CUDA_SOURCE_DIR}/extras/CUPTI/lib64 + ${CUDA_SOURCE_DIR}/lib + ${CUDA_SOURCE_DIR}/lib64 ++ $ENV{CUPTI_LIBRARY_DIR} + NO_DEFAULT_PATH) + + find_path(CUPTI_INCLUDE_DIR cupti.h PATHS +@@ -1886,6 +1887,7 @@ if(USE_KINETO) + ${CUDA_INCLUDE_DIRS} + ${CUDA_SOURCE_DIR} + ${CUDA_SOURCE_DIR}/include ++ $ENV{CUPTI_INCLUDE_DIR} + NO_DEFAULT_PATH) + + if(CUPTI_LIBRARY_PATH AND CUPTI_INCLUDE_DIR) diff --git a/pkgs/python-modules/torch/source/2_9/0001-patch-xpu-ops-CMake.patch b/pkgs/python-modules/torch/source/2_9/0001-patch-xpu-ops-CMake.patch new file mode 100644 index 00000000..479446b0 --- /dev/null +++ b/pkgs/python-modules/torch/source/2_9/0001-patch-xpu-ops-CMake.patch @@ -0,0 +1,50 @@ +From 3b5dd2c142173a9a6afe6b21a3107e04d7fcc0ea Mon Sep 17 00:00:00 2001 +From: "Wang, Yi A" +Date: Mon, 25 Aug 2025 17:39:38 -0700 +Subject: [PATCH] patch xpu ops CMake + +Signed-off-by: Wang, Yi A +--- + cmake/BuildFlags.cmake | 1 + + cmake/Modules/FindSYCL.cmake | 6 +++++- + 2 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/cmake/BuildFlags.cmake b/cmake/BuildFlags.cmake +index ec9aaccb..8f4740df 100644 +--- a/cmake/BuildFlags.cmake ++++ b/cmake/BuildFlags.cmake +@@ -83,6 +83,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" + set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-approx-func) + set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-absolute-value) + set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -no-ftz) ++ set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -D_GLIBCXX_USE_CXX11_ABI=1) + endif() + + if(CMAKE_BUILD_TYPE MATCHES Debug) +diff --git a/cmake/Modules/FindSYCL.cmake b/cmake/Modules/FindSYCL.cmake +index 86457ba3..0feec0fa 100644 +--- a/cmake/Modules/FindSYCL.cmake ++++ b/cmake/Modules/FindSYCL.cmake +@@ -34,7 +34,8 @@ + # SYCL_ADD_EXECUTABLE + # + # SYCL_ADD_LIBRARY +- ++set(SYCL_EXTRA_INCLUDE_DIRS $ENV{SYCL_EXTRA_INCLUDE_DIRS}) ++string(REPLACE " " ";" SYCL_EXTRA_INCLUDE_DIRS "${SYCL_EXTRA_INCLUDE_DIRS}") + macro(SYCL_FIND_HELPER_FILE _name _extension) + set(_full_name "${_name}.${_extension}") + # CMAKE_CURRENT_LIST_FILE contains the full path to the file currently being +@@ -216,6 +217,9 @@ macro(SYCL_WRAP_SRCS sycl_target generated_files) + set(generated_extension ${CMAKE_${SYCL_C_OR_CXX}_OUTPUT_EXTENSION}) + + set(SYCL_include_dirs "${SYCL_INCLUDE_DIR}") ++ if(DEFINED SYCL_EXTRA_INCLUDE_DIRS) ++ list(APPEND SYCL_include_dirs ${SYCL_EXTRA_INCLUDE_DIRS}) ++ endif() + list(APPEND SYCL_include_dirs "$") + + set(SYCL_compile_definitions "$") +-- +2.43.0 + diff --git a/pkgs/python-modules/torch/source/2_9/default.nix b/pkgs/python-modules/torch/source/2_9/default.nix new file mode 100644 index 00000000..24b7c8f4 --- /dev/null +++ b/pkgs/python-modules/torch/source/2_9/default.nix @@ -0,0 +1,744 @@ +{ + stdenv, + stdenvAdapters, + lib, + fetchFromGitHub, + fetchpatch, + buildPythonPackage, + python, + config, + cudaSupport ? config.cudaSupport, + cudaPackages, + autoAddDriverRunpath, + effectiveMagma ? + if cudaSupport then + magma-cuda-static + else if rocmSupport then + magma-hip + else + magma, + effectiveStdenv ? if cudaSupport then cudaPackages.backendStdenv else stdenv, + magma, + magma-hip, + magma-cuda-static, + # Use the system NCCL as long as we're targeting CUDA on a supported platform. + useSystemNccl ? (cudaSupport && !cudaPackages.nccl.meta.unsupported || rocmSupport), + MPISupport ? false, + mpi, + nvtx, + buildDocs ? false, + cxx11Abi ? true, + + # tests.cudaAvailable: + callPackage, + + # Native build inputs + cmake, + symlinkJoin, + which, + pybind11, + removeReferencesTo, + + # Build inputs + apple-sdk_15, + libdrm, + numactl, + + # dependencies + astunparse, + binutils, + expecttest, + filelock, + fsspec, + hypothesis, + jinja2, + networkx, + packaging, + psutil, + pyyaml, + requests, + setuptools, + sympy, + types-dataclasses, + typing-extensions, + # ROCm build and `torch.compile` requires `triton` + tritonSupport ? (!stdenv.hostPlatform.isDarwin), + triton, + + # TODO: 1. callPackage needs to learn to distinguish between the task + # of "asking for an attribute from the parent scope" and + # the task of "exposing a formal parameter in .override". + # TODO: 2. We should probably abandon attributes such as `torchWithCuda` (etc.) + # as they routinely end up consuming the wrong arguments\ + # (dependencies without cuda support). + # Instead we should rely on overlays and nixpkgsFun. + # (@SomeoneSerge) + _tritonEffective ? + if cudaSupport then + triton-cuda + else if xpuSupport then + python.pkgs.triton-xpu_2_9 + else + triton, + triton-cuda, + + # Disable MKLDNN on aarch64-darwin, it negatively impacts performance, + # this is also what official pytorch build does + mklDnnSupport ? !(stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64), + + # virtual pkg that consistently instantiates blas across nixpkgs + # See https://github.com/NixOS/nixpkgs/pull/83888 + blas, + + # ninja (https://ninja-build.org) must be available to run C++ extensions tests, + ninja, + + # dependencies for torch.utils.tensorboard + pillow, + six, + tensorboard, + protobuf, + + # ROCm dependencies + rocmSupport ? config.rocmSupport, + rocmPackages, + xpuSupport ? (config.xpuSupport or false), + xpuPackages, + gpuTargets ? [ ], +}: + +let + inherit (lib) + attrsets + lists + strings + trivial + ; + inherit (cudaPackages) cudnn nccl; + cudaFlags = cudaPackages.flags; + + triton = throw "python3Packages.torch: use _tritonEffective instead of triton to avoid divergence"; + + setBool = v: if v then "1" else "0"; + + archs = (import ../../archs.nix)."2.9"; + + supportedTorchCudaCapabilities = + let + inherit (archs) capsPerCudaVersion; + real = capsPerCudaVersion."${lib.versions.majorMinor cudaPackages.cudaMajorMinorVersion}"; + ptx = lists.map (x: "${x}+PTX") real; + in + real ++ ptx; + + inherit (archs) supportedTorchRocmArchs; + + # NOTE: The lists.subtractLists function is perhaps a bit unintuitive. It subtracts the elements + # of the first list *from* the second list. That means: + # lists.subtractLists a b = b - a + + # For CUDA + supportedCudaCapabilities = lists.intersectLists cudaFlags.cudaCapabilities supportedTorchCudaCapabilities; + unsupportedCudaCapabilities = lists.subtractLists supportedCudaCapabilities cudaFlags.cudaCapabilities; + + # Use trivial.warnIf to print a warning if any unsupported GPU targets are specified. + gpuArchWarner = + supported: unsupported: + trivial.throwIf (supported == [ ]) ( + "No supported GPU targets specified. Requested GPU targets: " + + strings.concatStringsSep ", " unsupported + ) supported; + + # Create the gpuTargetString. + gpuTargetString = strings.concatStringsSep ";" ( + if gpuTargets != [ ] then + # If gpuTargets is specified, it always takes priority. + gpuTargets + else if cudaSupport then + gpuArchWarner supportedCudaCapabilities unsupportedCudaCapabilities + else if rocmSupport then + supportedTorchRocmArchs + else + throw "No GPU targets specified" + ); + + rocmtoolkit_joined = symlinkJoin { + name = "rocm-merged"; + + paths = with rocmPackages; [ + aotriton_0_11 + clr + comgr + hipblas + hipblas-common-devel + hipblaslt + hipfft + hipify-clang + hiprand + hipsolver + hipsparse + hipsparselt + hsa-rocr + miopen-hip + rccl + rocblas + rocm-core + rocm-device-libs + rocm-hip-runtime + rocm-smi-lib + rocminfo + rocrand + rocsolver + rocsparse + roctracer + ]; + + postBuild = '' + # Fix `setuptools` not being found + rm -rf $out/nix-support + + # Variables that we want to pass through to downstream derivations. + mkdir -p $out/nix-support + echo 'export ROCM_PATH="${placeholder "out"}"' >> $out/nix-support/setup-hook + echo 'export ROCM_SOURCE_DIR="${placeholder "out"}"' >> $out/nix-support/setup-hook + echo 'export CMAKE_CXX_FLAGS="-I${placeholder "out"}/include -I${placeholder "out"}/include/rocblas"' >> $out/nix-support/setup-hook + ''; + }; + + brokenConditions = attrsets.filterAttrs (_: cond: cond) { + "CUDA and ROCm are mutually exclusive" = cudaSupport && rocmSupport; + "CUDA is not targeting Linux" = cudaSupport && !stdenv.hostPlatform.isLinux; + "Unsupported CUDA version" = + cudaSupport + && !(builtins.elem cudaPackages.cudaMajorVersion [ + "12" + "13" + ]); + "MPI cudatoolkit does not match cudaPackages.cudatoolkit" = + MPISupport && cudaSupport && (mpi.cudatoolkit != cudaPackages.cudatoolkit); + # This used to be a deep package set comparison between cudaPackages and + # effectiveMagma.cudaPackages, making torch too strict in cudaPackages. + # In particular, this triggered warnings from cuda's `aliases.nix` + "Magma cudaPackages does not match cudaPackages" = + cudaSupport + && (effectiveMagma.cudaPackages.cudaMajorMinorVersion != cudaPackages.cudaMajorMinorVersion); + #"Rocm support is currently broken because `rocmPackages.hipblaslt` is unpackaged. (2024-06-09)" = + # rocmSupport; + }; + torchXpuOpsSrc = + if xpuSupport then + fetchFromGitHub { + owner = "intel"; + repo = "torch-xpu-ops"; + rev = "f8408a642da568051ab82e20f2947b89e491fbeb"; + hash = "sha256-eoT8mvaPw1NFFTYFVT6NUqOFOo4rDdNrIseF+FDpXUk="; + } + else + null; +in +buildPythonPackage rec { + pname = "torch"; + version = "2.9.0"; + pyproject = true; + + stdenv = effectiveStdenv; + + outputs = [ + "out" # output standard python package + "dev" # output libtorch headers + "lib" # output libtorch libraries + "cxxdev" # propagated deps for the cmake consumers of torch + ]; + cudaPropagateToOutput = "cxxdev"; + rocmPropagateToOutput = "cxxdev"; + + src = fetchFromGitHub { + owner = "pytorch"; + repo = "pytorch"; + tag = "v${version}"; + fetchSubmodules = true; + hash = "sha256-Jszhe67FteiSbkbUEjVIkWVUjUY8IS5qVHct4HvcfIg="; + }; + + patches = [ + ./mkl-rpath.patch + ] + ++ lib.optionals cudaSupport [ ./fix-cmake-cuda-toolkit.patch ] + ++ lib.optionals (stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isx86_64) [ + # pthreadpool added support for Grand Central Dispatch in April + # 2020. However, this relies on functionality (DISPATCH_APPLY_AUTO) + # that is available starting with macOS 10.13. However, our current + # base is 10.12. Until we upgrade, we can fall back on the older + # pthread support. + ./pthreadpool-disable-gcd.diff + ] + ++ lib.optionals stdenv.hostPlatform.isLinux [ + # Propagate CUPTI to Kineto by overriding the search path with environment variables. + # https://github.com/pytorch/pytorch/pull/108847 + ./pytorch-pr-108847.patch + ]; + + postUnpack = lib.optionalString xpuSupport '' + cp -r --no-preserve=mode ${torchXpuOpsSrc} $sourceRoot/third_party/torch-xpu-ops + patch -d $sourceRoot/third_party/torch-xpu-ops -p1 < ${./0001-patch-xpu-ops-CMake.patch} + ''; + + postPatch = + let + pyiGenPath = "${typing-extensions}/${python.sitePackages}:${pyyaml}/${python.sitePackages}"; + in + '' + substituteInPlace pyproject.toml \ + --replace-fail "setuptools>=70.1.0,<80.0" \ + "setuptools>=70.1.0" + + substituteInPlace cmake/public/cuda.cmake \ + --replace-fail \ + 'message(FATAL_ERROR "Found two conflicting CUDA' \ + 'message(WARNING "Found two conflicting CUDA' \ + --replace-warn \ + "set(CUDAToolkit_ROOT" \ + "# Upstream: set(CUDAToolkit_ROOT" + substituteInPlace third_party/gloo/cmake/Cuda.cmake \ + --replace-warn "find_package(CUDAToolkit 7.0" "find_package(CUDAToolkit" + + # annotations (3.7), print_function (3.0), with_statement (2.6) are all supported + sed -i -e "/from __future__ import/d" **.py + #substituteInPlace third_party/NNPACK/CMakeLists.txt \ + # --replace-fail "PYTHONPATH=" 'PYTHONPATH=$ENV{PYTHONPATH}:' + # flag from cmakeFlags doesn't work, not clear why + # setting it at the top of NNPACK's own CMakeLists does + sed -i '2s;^;set(PYTHON_SIX_SOURCE_DIR ${six.src})\n;' third_party/NNPACK/CMakeLists.txt + + # Ensure that torch profiler unwind uses addr2line from nix + substituteInPlace torch/csrc/profiler/unwind/unwind.cpp \ + --replace-fail 'addr2line_binary_ = "addr2line"' 'addr2line_binary_ = "${lib.getExe' binutils "addr2line"}"' + + # gen_pyi needs typing-extensions. + #substituteInPlace torch/CMakeLists.txt \ + # --replace-fail "env PYTHONPATH=\"\''${TORCH_ROOT}\"" \ + # "env PYTHONPATH=\"\''${TORCH_ROOT}:${pyiGenPath}\"" + '' + + lib.optionalString rocmSupport '' + # https://github.com/facebookincubator/gloo/pull/297 + substituteInPlace third_party/gloo/cmake/Hipify.cmake \ + --replace-fail "\''${HIPIFY_COMMAND}" "python \''${HIPIFY_COMMAND}" + + # Replace hard-coded rocm paths + substituteInPlace caffe2/CMakeLists.txt \ + --replace-fail "/opt/rocm" "${rocmtoolkit_joined}" + '' + # Detection of NCCL version doesn't work particularly well when using the static binary. + + lib.optionalString cudaSupport '' + substituteInPlace cmake/Modules/FindNCCL.cmake \ + --replace-fail \ + 'message(FATAL_ERROR "Found NCCL header version and library version' \ + 'message(WARNING "Found NCCL header version and library version' + '' + # Remove PyTorch's FindCUDAToolkit.cmake and use CMake's default. + # NOTE: Parts of pytorch rely on unmaintained FindCUDA.cmake with custom patches to support e.g. + # newer architectures (sm_90a). We do want to delete vendored patches, but have to keep them + # until https://github.com/pytorch/pytorch/issues/76082 is addressed + + lib.optionalString cudaSupport '' + rm cmake/Modules/FindCUDAToolkit.cmake + '' + + lib.optionalString xpuSupport '' + # replace oneapi DIR + substituteInPlace cmake/Modules/FindMKL.cmake \ + --replace-fail 'SET(DEFAULT_INTEL_ONEAPI_DIR "/opt/intel/oneapi")' 'SET(DEFAULT_INTEL_ONEAPI_DIR ${xpuPackages.oneapi-torch-dev}/oneapi)' + # replace mkldnn build for xpu + sed -i '/ExternalProject_Add(xpu_mkldnn_proj/,/^ *)/s/^/#/' cmake/Modules/FindMKLDNN.cmake + substituteInPlace cmake/Modules/FindMKLDNN.cmake \ + --replace-fail 'ExternalProject_Get_Property(xpu_mkldnn_proj SOURCE_DIR BINARY_DIR)' '# ExternalProject_Get_Property(xpu_mkldnn_proj SOURCE_DIR BINARY_DIR)' \ + --replace-fail "set(XPU_MKLDNN_LIBRARIES \''${BINARY_DIR}/src/\''${DNNL_LIB_NAME})" "set(XPU_MKLDNN_LIBRARIES ${xpuPackages.onednn-xpu}/lib/libdnnl.a)" \ + --replace-fail "set(XPU_MKLDNN_INCLUDE \''${SOURCE_DIR}/include \''${BINARY_DIR}/include)" "set(XPU_MKLDNN_INCLUDE ${xpuPackages.onednn-xpu}/include)" + # comment torch-xpu-ops git clone block in pytorch/caffe2/CMakeLists.txt + sed -i '/set(TORCH_XPU_OPS_REPO_URL/,/^ endif()/s/^/#/' caffe2/CMakeLists.txt + sed -i '/execute_process(/,/^ endif()/s/^/#/' caffe2/CMakeLists.txt + '' + # error: no member named 'aligned_alloc' in the global namespace; did you mean simply 'aligned_alloc' + # This lib overrided aligned_alloc hence the error message. Tltr: his function is linkable but not in header. + + + lib.optionalString + (stdenv.hostPlatform.isDarwin && lib.versionOlder stdenv.hostPlatform.darwinSdkVersion "11.0") + '' + substituteInPlace third_party/pocketfft/pocketfft_hdronly.h --replace-fail '#if (__cplusplus >= 201703L) && (!defined(__MINGW32__)) && (!defined(_MSC_VER)) + inline void *aligned_alloc(size_t align, size_t size)' '#if 0 + inline void *aligned_alloc(size_t align, size_t size)' + ''; + + # NOTE(@connorbaker): Though we do not disable Gloo or MPI when building with CUDA support, caution should be taken + # when using the different backends. Gloo's GPU support isn't great, and MPI and CUDA can't be used at the same time + # without extreme care to ensure they don't lock each other out of shared resources. + # For more, see https://github.com/open-mpi/ompi/issues/7733#issuecomment-629806195. + preConfigure = + lib.optionalString cudaSupport '' + export TORCH_CUDA_ARCH_LIST="${gpuTargetString}" + export CUPTI_INCLUDE_DIR=${lib.getDev cudaPackages.cuda_cupti}/include + export CUPTI_LIBRARY_DIR=${lib.getLib cudaPackages.cuda_cupti}/lib + '' + + lib.optionalString (cudaSupport && cudaPackages ? cudnn) '' + export CUDNN_INCLUDE_DIR=${lib.getLib cudnn}/include + export CUDNN_LIB_DIR=${cudnn.lib}/lib + '' + + lib.optionalString rocmSupport '' + export PYTORCH_ROCM_ARCH="${gpuTargetString}" + python tools/amd_build/build_amd.py + ''; + + # Use pytorch's custom configurations + dontUseCmakeConfigure = true; + + # causes possible redefinition of _FORTIFY_SOURCE + hardeningDisable = [ "fortify3" ]; + + BUILD_NAMEDTENSOR = setBool true; + BUILD_DOCS = setBool buildDocs; + + # We only do an imports check, so do not build tests either. + BUILD_TEST = setBool false; + + # ninja hook doesn't automatically turn on ninja + # because pytorch setup.py is responsible for this + CMAKE_GENERATOR = "Ninja"; + + # Whether to use C++11 ABI (or earlier). + _GLIBCXX_USE_CXX11_ABI = setBool cxx11Abi; + + # Unlike MKL, oneDNN (née MKLDNN) is FOSS, so we enable support for + # it by default. PyTorch currently uses its own vendored version + # of oneDNN through Intel iDeep. + USE_MKLDNN = setBool mklDnnSupport; + USE_MKLDNN_CBLAS = setBool mklDnnSupport; + + # Avoid using pybind11 from git submodule + # Also avoids pytorch exporting the headers of pybind11 + USE_SYSTEM_PYBIND11 = true; + + cmakeFlags = [ + # (lib.cmakeBool "CMAKE_FIND_DEBUG_MODE" true) + (lib.cmakeFeature "CUDAToolkit_VERSION" cudaPackages.cudaMajorMinorVersion) + ] + ++ lib.optionals cudaSupport [ + # Unbreaks version discovery in enable_language(CUDA) when wrapping nvcc with ccache + # Cf. https://gitlab.kitware.com/cmake/cmake/-/issues/26363 + (lib.cmakeFeature "CMAKE_CUDA_COMPILER_TOOLKIT_VERSION" cudaPackages.cudaMajorMinorVersion) + ]; + + preBuild = '' + export MAX_JOBS=$NIX_BUILD_CORES + ${python.pythonOnBuildForHost.interpreter} setup.py build --cmake-only + ${cmake}/bin/cmake build + ''; + + preFixup = '' + function join_by { local IFS="$1"; shift; echo "$*"; } + function strip2 { + IFS=':' + read -ra RP <<< $(patchelf --print-rpath $1) + IFS=' ' + RP_NEW=$(join_by : ''${RP[@]:2}) + patchelf --set-rpath \$ORIGIN:''${RP_NEW} "$1" + } + for f in $(find ''${out} -name 'libcaffe2*.so') + do + strip2 $f + done + ''; + + # Override the (weirdly) wrong version set by default. See + # https://github.com/NixOS/nixpkgs/pull/52437#issuecomment-449718038 + # https://github.com/pytorch/pytorch/blob/v1.0.0/setup.py#L267 + PYTORCH_BUILD_VERSION = version; + PYTORCH_BUILD_NUMBER = 0; + + # In-tree builds of NCCL are not supported. + # Use NCCL when cudaSupport is enabled and nccl is available. + USE_NCCL = setBool useSystemNccl; + USE_SYSTEM_NCCL = USE_NCCL; + USE_STATIC_NCCL = USE_NCCL; + + # Set the correct Python library path, broken since + # https://github.com/pytorch/pytorch/commit/3d617333e + PYTHON_LIB_REL_PATH = "${placeholder "out"}/${python.sitePackages}"; + + # Suppress a weird warning in mkl-dnn, part of ideep in pytorch + # (upstream seems to have fixed this in the wrong place?) + # https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc + # https://github.com/pytorch/pytorch/issues/22346 + # + # Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++: + # https://github.com/pytorch/pytorch/blob/v1.11.0/setup.py#L17 + env = { + # Builds faster without this and we don't have enough inputs that cmd length is an issue + NIX_CC_USE_RESPONSE_FILE = 0; + + NIX_CFLAGS_COMPILE = toString ( + (lib.optionals (blas.implementation == "mkl") [ "-Wno-error=array-bounds" ] ++ [ "-Wno-error" ]) + ); + } + // lib.optionalAttrs rocmSupport { + AOTRITON_INSTALLED_PREFIX = rocmPackages.aotriton_0_10; + } + // lib.optionalAttrs stdenv.hostPlatform.isDarwin { + USE_MPS = 1; + } + // lib.optionalAttrs xpuSupport { + MKLROOT = xpuPackages.oneapi-torch-dev; + SYCL_ROOT = xpuPackages.oneapi-torch-dev; + }; + + nativeBuildInputs = [ + cmake + ninja + pybind11 + removeReferencesTo + which + ] + ++ lib.optionals cudaSupport ( + with cudaPackages; + [ + autoAddDriverRunpath + cuda_nvcc + ] + ) + ++ lib.optionals rocmSupport [ + rocmtoolkit_joined + rocmPackages.setupRocmHook + ] + ++ lib.optionals xpuSupport ( + with xpuPackages; + [ + ocloc + oneapi-torch-dev + ] + ); + + buildInputs = [ + blas + blas.provider + ] + ++ lib.optionals cudaSupport ( + with cudaPackages; + [ + cuda_cccl # + cuda_cudart # cuda_runtime.h and libraries + cuda_cupti # For kineto + cuda_nvcc # crt/host_config.h; even though we include this in nativeBuildInputs, it's needed here too + cuda_nvml_dev # + cuda_nvrtc + #cuda_nvtx # -llibNVToolsExt + cuda_profiler_api # + nvtx + libcublas + libcufile + libcufft + libcurand + libcusolver + libcusparse + ] + ++ lists.optionals (cudaPackages ? cudnn) [ cudnn ] + ++ lists.optionals useSystemNccl [ + # Some platforms do not support NCCL (i.e., Jetson) + nccl # Provides nccl.h AND a static copy of NCCL! + ] + ) + ++ lib.optionals rocmSupport ( + with rocmPackages; + [ + composablekernel-devel + hipcub-devel + libdrm + openmp + rocmtoolkit_joined + rocprim-devel + rocthrust-devel + ] + ) + ++ lib.optionals xpuSupport ( + with xpuPackages; + [ + oneapi-torch-dev + onednn-xpu + ] + ) + ++ lib.optionals (cudaSupport || rocmSupport) [ effectiveMagma ] + ++ lib.optionals stdenv.hostPlatform.isLinux [ numactl ] + ++ lib.optionals stdenv.hostPlatform.isDarwin [ + apple-sdk_15 + ] + ++ lib.optionals tritonSupport [ _tritonEffective ] + ++ lib.optionals MPISupport [ mpi ]; + + pythonRelaxDeps = [ + "sympy" + ]; + dependencies = [ + astunparse + expecttest + filelock + fsspec + hypothesis + jinja2 + networkx + ninja + packaging + psutil + pyyaml + requests + sympy + types-dataclasses + typing-extensions + + # the following are required for tensorboard support + pillow + six + tensorboard + protobuf + + # torch/csrc requires `pybind11` at runtime + pybind11 + ] + ++ lib.optionals (lib.versionAtLeast python.version "3.12") [ setuptools ] + ++ lib.optionals tritonSupport [ _tritonEffective ]; + + propagatedCxxBuildInputs = + [ ] ++ lib.optionals MPISupport [ mpi ] ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]; + + # Tests take a long time and may be flaky, so just sanity-check imports + doCheck = false; + + pythonImportsCheck = [ "torch" ]; + + nativeCheckInputs = [ + hypothesis + ninja + psutil + ]; + + checkPhase = + with lib.versions; + with lib.strings; + concatStringsSep " " [ + "runHook preCheck" + "${python.interpreter} test/run_test.py" + "--exclude" + (concatStringsSep " " [ + "utils" # utils requires git, which is not allowed in the check phase + + # "dataloader" # psutils correctly finds and triggers multiprocessing, but is too sandboxed to run -- resulting in numerous errors + # ^^^^^^^^^^^^ NOTE: while test_dataloader does return errors, these are acceptable errors and do not interfere with the build + + # tensorboard has acceptable failures for pytorch 1.3.x due to dependencies on tensorboard-plugins + (optionalString (majorMinor version == "1.3") "tensorboard") + ]) + "runHook postCheck" + ]; + + pythonRemoveDeps = [ + # In our dist-info the name is just "triton" + "pytorch-triton-rocm" + ]; + + postInstall = '' + find "$out/${python.sitePackages}/torch/include" "$out/${python.sitePackages}/torch/lib" -type f -exec remove-references-to -t ${effectiveStdenv.cc} '{}' + + + mkdir $dev + cp -r $out/${python.sitePackages}/torch/include $dev/include + cp -r $out/${python.sitePackages}/torch/share $dev/share + + # Fix up library paths for split outputs + substituteInPlace \ + $dev/share/cmake/Torch/TorchConfig.cmake \ + --replace-fail \''${TORCH_INSTALL_PREFIX}/lib "$lib/lib" + + substituteInPlace \ + $dev/share/cmake/Caffe2/Caffe2Targets-release.cmake \ + --replace-fail \''${_IMPORT_PREFIX}/lib "$lib/lib" + + mkdir $lib + mv $out/${python.sitePackages}/torch/lib $lib/lib + ln -s $lib/lib $out/${python.sitePackages}/torch/lib + '' + + lib.optionalString rocmSupport '' + substituteInPlace $dev/share/cmake/Tensorpipe/TensorpipeTargets-release.cmake \ + --replace-fail "\''${_IMPORT_PREFIX}/lib64" "$lib/lib" + + substituteInPlace $dev/share/cmake/ATen/ATenConfig.cmake \ + --replace-fail "/build/source/torch/include" "$dev/include" + ''; + + postFixup = '' + mkdir -p "$cxxdev/nix-support" + printWords "''${propagatedCxxBuildInputs[@]}" >> "$cxxdev/nix-support/propagated-build-inputs" + '' + + lib.optionalString stdenv.hostPlatform.isDarwin '' + for f in $(ls $lib/lib/*.dylib); do + install_name_tool -id $lib/lib/$(basename $f) $f || true + done + + install_name_tool -change @rpath/libshm.dylib $lib/lib/libshm.dylib $lib/lib/libtorch_python.dylib + install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libtorch_python.dylib + install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch_python.dylib + + install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch.dylib + + install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libshm.dylib + install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libshm.dylib + ''; + + # See https://github.com/NixOS/nixpkgs/issues/296179 + # + # This is a quick hack to add `libnvrtc` to the runpath so that torch can find + # it when it is needed at runtime. + extraRunpaths = lib.optionals cudaSupport [ "${lib.getLib cudaPackages.cuda_nvrtc}/lib" ]; + postPhases = lib.optionals stdenv.hostPlatform.isLinux [ "postPatchelfPhase" ]; + postPatchelfPhase = '' + while IFS= read -r -d $'\0' elf ; do + for extra in $extraRunpaths ; do + echo patchelf "$elf" --add-rpath "$extra" >&2 + patchelf "$elf" --add-rpath "$extra" + done + done < <( + find "''${!outputLib}" "$out" -type f -iname '*.so' -print0 + ) + ''; + + # Builds in 2+h with 2 cores, and ~15m with a big-parallel builder. + requiredSystemFeatures = [ "big-parallel" ]; + + passthru = { + inherit + cudaSupport + cudaPackages + cxx11Abi + rocmSupport + rocmPackages + xpuSupport + xpuPackages + ; + cudaCapabilities = if cudaSupport then supportedCudaCapabilities else [ ]; + rocmArchs = if rocmSupport then supportedTorchRocmArchs else [ ]; + # At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability. + blasProvider = blas.provider; + # To help debug when a package is broken due to CUDA support + inherit brokenConditions; + tests = callPackage ./tests.nix { }; + }; + + meta = { + changelog = "https://github.com/pytorch/pytorch/releases/tag/v${version}"; + # keep PyTorch in the description so the package can be found under that name on search.nixos.org + description = "PyTorch: Tensors and Dynamic neural networks in Python with strong GPU acceleration"; + homepage = "https://pytorch.org/"; + license = lib.licenses.bsd3; + maintainers = with lib.maintainers; [ + teh + thoughtpolice + tscholak + ]; # tscholak esp. for darwin-related builds + platforms = + lib.platforms.linux ++ lib.optionals (!cudaSupport && !rocmSupport) lib.platforms.darwin; + broken = builtins.any trivial.id (builtins.attrValues brokenConditions); + }; +} diff --git a/pkgs/python-modules/torch/source/2_9/fix-cmake-cuda-toolkit.patch b/pkgs/python-modules/torch/source/2_9/fix-cmake-cuda-toolkit.patch new file mode 100644 index 00000000..35b6d23c --- /dev/null +++ b/pkgs/python-modules/torch/source/2_9/fix-cmake-cuda-toolkit.patch @@ -0,0 +1,51 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index c4cd4b2c2a..e983b21353 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -1319,10 +1319,6 @@ if(BUILD_SHARED_LIBS) + DIRECTORY ${PROJECT_SOURCE_DIR}/cmake/Modules_CUDA_fix + DESTINATION share/cmake/Caffe2/ + COMPONENT dev) +- install( +- FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUDAToolkit.cmake +- DESTINATION share/cmake/Caffe2/ +- COMPONENT dev) + install( + FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUSPARSELT.cmake + DESTINATION share/cmake/Caffe2/ +diff --git a/cmake/public/cuda.cmake b/cmake/public/cuda.cmake +index b37036d2f2..9878b70cf7 100644 +--- a/cmake/public/cuda.cmake ++++ b/cmake/public/cuda.cmake +@@ -61,9 +61,15 @@ find_package(CUDAToolkit REQUIRED) + cmake_policy(POP) + + if(NOT CMAKE_CUDA_COMPILER_VERSION VERSION_EQUAL CUDAToolkit_VERSION) +- message(FATAL_ERROR "Found two conflicting CUDA versions:\n" +- "V${CMAKE_CUDA_COMPILER_VERSION} in '${CUDA_INCLUDE_DIRS}' and\n" +- "V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIRS}'") ++ if(CUDA_INCLUDE_DIRS IN_LIST CUDAToolkit_INCLUDE_DIR) ++ message(STATUS "CUDA_INCLUDE_DIRS is a substring of CUDAToolkit_INCLUDE_DIR. " ++ "Setting CUDA_INCLUDE_DIRS to CUDAToolkit_INCLUDE_DIR.") ++ set(CUDA_INCLUDE_DIRS "${CUDAToolkit_INCLUDE_DIR}") ++ else() ++ message(FATAL_ERROR "Found two conflicting CUDA installs:\n" ++ "V${CMAKE_CUDA_COMPILER_VERSION} in '${CUDA_INCLUDE_DIRS}' and\n" ++ "V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIR}'") ++ endif() + endif() + + if(NOT TARGET CUDA::nvToolsExt) +diff --git a/tools/setup_helpers/cmake.py b/tools/setup_helpers/cmake.py +index 4d10b3db1a..48a463df59 100644 +--- a/tools/setup_helpers/cmake.py ++++ b/tools/setup_helpers/cmake.py +@@ -206,6 +206,8 @@ class CMake: + "BUILDING_WITH_TORCH_LIBS", + "CUDA_HOST_COMPILER", + "CUDA_NVCC_EXECUTABLE", ++ "CUDAToolkit_ROOT", ++ "CUDAToolkit_INCLUDE_DIR", + "CUDA_SEPARABLE_COMPILATION", + "CUDNN_LIBRARY", + "CUDNN_INCLUDE_DIR", diff --git a/pkgs/python-modules/torch/source/2_9/mkl-rpath.patch b/pkgs/python-modules/torch/source/2_9/mkl-rpath.patch new file mode 100644 index 00000000..bbb1bc2e --- /dev/null +++ b/pkgs/python-modules/torch/source/2_9/mkl-rpath.patch @@ -0,0 +1,17 @@ +diff --git a/cmake/public/mkl.cmake b/cmake/public/mkl.cmake +index 2f6d1fd905..f30464be07 100644 +--- a/cmake/public/mkl.cmake ++++ b/cmake/public/mkl.cmake +@@ -16,12 +16,6 @@ foreach(MKL_LIB IN LISTS MKL_LIBRARIES) + endif() + endforeach() + +-# TODO: This is a hack, it will not pick up architecture dependent +-# MKL libraries correctly; see https://github.com/pytorch/pytorch/issues/73008 +-set_property( +- TARGET caffe2::mkl PROPERTY INTERFACE_LINK_DIRECTORIES +- ${MKL_ROOT}/lib ${MKL_ROOT}/lib/intel64 ${MKL_ROOT}/lib/intel64_win ${MKL_ROOT}/lib/win-x64) +- + if(UNIX) + if(USE_STATIC_MKL) + foreach(MKL_LIB_PATH IN LISTS MKL_LIBRARIES) diff --git a/pkgs/python-modules/torch/source/2_9/pthreadpool-disable-gcd.diff b/pkgs/python-modules/torch/source/2_9/pthreadpool-disable-gcd.diff new file mode 100644 index 00000000..eddd4572 --- /dev/null +++ b/pkgs/python-modules/torch/source/2_9/pthreadpool-disable-gcd.diff @@ -0,0 +1,45 @@ +diff --git a/third_party/pthreadpool/CMakeLists.txt b/third_party/pthreadpool/CMakeLists.txt +index 0db3264..1ba91c4 100644 +--- a/third_party/pthreadpool/CMakeLists.txt ++++ b/third_party/pthreadpool/CMakeLists.txt +@@ -74,9 +74,7 @@ IF(EMSCRIPTEN) + LIST(APPEND PTHREADPOOL_SRCS src/shim.c) + ELSE() + LIST(APPEND PTHREADPOOL_SRCS src/portable-api.c src/memory.c) +- IF(APPLE AND (PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "default" OR PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "gcd")) +- LIST(APPEND PTHREADPOOL_SRCS src/gcd.c) +- ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$" AND (PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "default" OR PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "event")) ++ IF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$" AND (PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "default" OR PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "event")) + LIST(APPEND PTHREADPOOL_SRCS src/windows.c) + ELSE() + LIST(APPEND PTHREADPOOL_SRCS src/pthreads.c) +@@ -111,10 +109,6 @@ ELSEIF(PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "futex") + TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_FUTEX=1) + TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_GCD=0) + TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_EVENT=0) +-ELSEIF(PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "gcd") +- TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_FUTEX=0) +- TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_GCD=1) +- TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_EVENT=0) + ELSEIF(PTHREADPOOL_SYNC_PRIMITIVE STREQUAL "event") + TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_FUTEX=0) + TARGET_COMPILE_DEFINITIONS(pthreadpool PRIVATE PTHREADPOOL_USE_GCD=0) +diff --git a/third_party/pthreadpool/src/threadpool-common.h b/third_party/pthreadpool/src/threadpool-common.h +index ca84744..244d0ca 100644 +--- a/third_party/pthreadpool/src/threadpool-common.h ++++ b/third_party/pthreadpool/src/threadpool-common.h +@@ -14,14 +14,6 @@ + #endif + #endif + +-#ifndef PTHREADPOOL_USE_GCD +- #if defined(__APPLE__) +- #define PTHREADPOOL_USE_GCD 1 +- #else +- #define PTHREADPOOL_USE_GCD 0 +- #endif +-#endif +- + #ifndef PTHREADPOOL_USE_EVENT + #if defined(_WIN32) || defined(__CYGWIN__) + #define PTHREADPOOL_USE_EVENT 1 diff --git a/pkgs/python-modules/torch/source/2_9/pytorch-pr-108847.patch b/pkgs/python-modules/torch/source/2_9/pytorch-pr-108847.patch new file mode 100644 index 00000000..9511191c --- /dev/null +++ b/pkgs/python-modules/torch/source/2_9/pytorch-pr-108847.patch @@ -0,0 +1,31 @@ +From bf4050edab9f294a8e0060c47f906cd7a80f25a2 Mon Sep 17 00:00:00 2001 +From: Samuel Ainsworth +Date: Sat, 9 Sep 2023 02:04:09 +0000 +Subject: [PATCH] Dependencies.cmake: support building against CUPTI outside of + CUDA_SOURCE_DIR + +Limitation discovered in https://github.com/NixOS/nixpkgs/pull/249259. +--- + cmake/Dependencies.cmake | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index 0602d534dc4c14..5f6a5f79f3e3dc 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1879,6 +1879,7 @@ if(USE_KINETO) + ${CUDA_SOURCE_DIR}/extras/CUPTI/lib64 + ${CUDA_SOURCE_DIR}/lib + ${CUDA_SOURCE_DIR}/lib64 ++ $ENV{CUPTI_LIBRARY_DIR} + NO_DEFAULT_PATH) + + find_path(CUPTI_INCLUDE_DIR cupti.h PATHS +@@ -1886,6 +1887,7 @@ if(USE_KINETO) + ${CUDA_INCLUDE_DIRS} + ${CUDA_SOURCE_DIR} + ${CUDA_SOURCE_DIR}/include ++ $ENV{CUPTI_INCLUDE_DIR} + NO_DEFAULT_PATH) + + if(CUPTI_LIBRARY_PATH AND CUPTI_INCLUDE_DIR) From 2bc1a3a26ff46e8ff86aaf940b71b8c979e0577e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Wed, 19 Nov 2025 14:34:08 +0000 Subject: [PATCH 2/2] Add zlib as a dependency to aarch64-linux Torch Torch vendors libgfortran, which has zlib as a dependency. We might want to use system libgfortran in the future instead, but it requires more investigation/testing. Fixes #301. --- pkgs/python-modules/torch/binary/generic.nix | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkgs/python-modules/torch/binary/generic.nix b/pkgs/python-modules/torch/binary/generic.nix index 8acd481c..4f4a90b0 100644 --- a/pkgs/python-modules/torch/binary/generic.nix +++ b/pkgs/python-modules/torch/binary/generic.nix @@ -22,6 +22,7 @@ cudaPackages, rocmPackages, xpuPackages, + zlib, # Python dependencies filelock, @@ -193,7 +194,9 @@ buildPythonPackage { intel-oneapi-mpi intel-pti ] - ); + ) + # Torch on aarch64-linux vendors libgfortran, which requires zlib. + ++ lib.optionals (stdenv.hostPlatform.isLinux && stdenv.hostPlatform.isAarch64) [ zlib ]; dependencies = [ filelock