From 137c70251fa11aee14c2bdeecf7783e2200275e2 Mon Sep 17 00:00:00 2001
From: Aleksei Nikiforov <aleksei.nikiforov@linux.ibm.com>
Date: Fri, 21 Nov 2025 15:25:57 +0100
Subject: [PATCH 1/3] Fix convert_hf_to_gguf.py script on s390x

Assume converted model data is originally little-endian.
Byteswap data on s390x after reading it to put values in correct presentation
for any transformation needed, like calculating weight tensors.

Then byteswap data to little-endian before passing it to GGUFWriter while
GGUFWriter will byteswap data back to big endian if big endian output is requested.

byteswap(inplace=True) calls don't work with lazy tensor and array wrappers.
Use byteswap with copying data to workaround this behaviour.
---
 convert_hf_to_gguf.py       | 33 ++++++++++++++++++++++++++++++++-
 gguf-py/gguf/gguf_writer.py |  6 ++++--
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index cc77a3db273..d03f458af16 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -615,6 +615,12 @@ def prepare_tensors(self):
                 # reverse shape to make it similar to the internal ggml dimension order
                 shape_str = f"{{{', '.join(str(n) for n in reversed(shape))}}}"
 
+                if sys.byteorder == 'big':
+                    # Switch data back to little-endian.
+                    # gguf_writer.add_tensor later switches it back to big endian if needed.
+                    # Don't byteswap inplace since it cannot handle lazy copies
+                    data = data.byteswap(inplace=False)
+
                 # n_dims is implicit in the shape
                 logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")
 
@@ -10039,6 +10045,25 @@ class LazyTorchTensor(gguf.LazyBase):
         torch.uint8: np.uint8,
     }
 
+    # only used when byteswapping data. Only correct size is needed
+    _dtype_byteswap_map: dict[torch.dtype, type] = {
+        torch.float64: np.float64,
+        torch.float32: np.float32,
+        torch.bfloat16: np.float16,
+        torch.float16: np.float16,
+        torch.int64: np.int64,
+        torch.uint64: np.uint64,
+        torch.int32: np.int32,
+        torch.uint32: np.uint32,
+        torch.int16: np.int16,
+        torch.uint16: np.uint16,
+        torch.int8: np.int8,
+        torch.uint8: np.uint8,
+        torch.bool: np.uint8,
+        torch.float8_e4m3fn: np.uint8,
+        torch.float8_e5m2: np.uint8,
+    }
+
     # used for safetensors slices
     # ref: https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/src/lib.rs#L1046
     # TODO: uncomment U64, U32, and U16, ref: https://github.com/pytorch/pytorch/issues/58734
@@ -10082,8 +10107,14 @@ def from_safetensors_slice(cls, st_slice: Any) -> Tensor:
     @classmethod
     def from_local_tensor(cls, t: gguf.utility.LocalTensor) -> Tensor:
         def load_tensor(tensor: gguf.utility.LocalTensor) -> Tensor:
+            def byteswap_tensor(tensor: np.ndarray, dtype: type) -> np.ndarray:
+                if sys.byteorder == 'big':
+                    # switch data back to big endian
+                    tensor = tensor.view(dtype).byteswap(inplace=False)
+                return tensor
             dtype = cls._dtype_str_map[tensor.dtype]
-            return torch.from_numpy(tensor.mmap_bytes()).view(dtype).reshape(tensor.shape)
+            numpy_dtype = cls._dtype_byteswap_map[dtype]
+            return torch.from_numpy(byteswap_tensor(tensor.mmap_bytes(), numpy_dtype)).view(dtype).reshape(tensor.shape)
         dtype = cls._dtype_str_map[t.dtype]
         shape = t.shape
         lazy = cls(meta=cls.meta_with_dtype_and_shape(dtype, shape), args=(t,), func=lambda r: load_tensor(r))
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index a051daeeb13..b25c180b33e 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -373,7 +373,8 @@ def add_tensor(
         raw_dtype: GGMLQuantizationType | None = None,
     ) -> None:
         if self.endianess == GGUFEndian.BIG:
-            tensor.byteswap(inplace=True)
+            # Don't byteswap inplace since lazy copies cannot handle it
+            tensor = tensor.byteswap(inplace=False)
         if self.use_temp_file and self.temp_file is None:
             fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256 * 1024 * 1024)
             fp.seek(0)
@@ -400,7 +401,8 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
         assert self.fout is not None
 
         if self.endianess == GGUFEndian.BIG:
-            tensor.byteswap(inplace=True)
+            # Don't byteswap inplace since lazy copies cannot handle it
+            tensor = tensor.byteswap(inplace=False)
 
         file_id = -1
         for i, tensors in enumerate(self.tensors):

From ed94707182d719c000ac93aa3821e0464bcf33ee Mon Sep 17 00:00:00 2001
From: Aleksei Nikiforov <aleksei.nikiforov@linux.ibm.com>
Date: Fri, 21 Nov 2025 15:43:21 +0100
Subject: [PATCH 2/3] Make GGUFWriter accept tensors in native endianness
 instead of little-endian

With this change if no byteswapping is actually needed, 2 excessive byteswaps can be omitted on s390x
---
 convert_hf_to_gguf.py       | 6 ------
 gguf-py/gguf/gguf_writer.py | 7 +++++--
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index d03f458af16..498ac3316d7 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -615,12 +615,6 @@ def prepare_tensors(self):
                 # reverse shape to make it similar to the internal ggml dimension order
                 shape_str = f"{{{', '.join(str(n) for n in reversed(shape))}}}"
 
-                if sys.byteorder == 'big':
-                    # Switch data back to little-endian.
-                    # gguf_writer.add_tensor later switches it back to big endian if needed.
-                    # Don't byteswap inplace since it cannot handle lazy copies
-                    data = data.byteswap(inplace=False)
-
                 # n_dims is implicit in the shape
                 logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")
 
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index b25c180b33e..07a763a1cf8 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -4,6 +4,7 @@
 import os
 import shutil
 import struct
+import sys
 import tempfile
 from dataclasses import dataclass
 from enum import Enum, auto
@@ -372,7 +373,8 @@ def add_tensor(
         self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
         raw_dtype: GGMLQuantizationType | None = None,
     ) -> None:
-        if self.endianess == GGUFEndian.BIG:
+        if (self.endianess == GGUFEndian.BIG and sys.byteorder != 'big') or \
+                (self.endianess == GGUFEndian.LITTLE and sys.byteorder != 'little'):
             # Don't byteswap inplace since lazy copies cannot handle it
             tensor = tensor.byteswap(inplace=False)
         if self.use_temp_file and self.temp_file is None:
@@ -400,7 +402,8 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
             raise ValueError(f'Expected output file to contain tensor info or weights, got {self.state}')
         assert self.fout is not None
 
-        if self.endianess == GGUFEndian.BIG:
+        if (self.endianess == GGUFEndian.BIG and sys.byteorder != 'big') or \
+                (self.endianess == GGUFEndian.LITTLE and sys.byteorder != 'little'):
             # Don't byteswap inplace since lazy copies cannot handle it
             tensor = tensor.byteswap(inplace=False)
 

From 9372b108a7611183cfb7c3b11d54f28a9ef2cfbf Mon Sep 17 00:00:00 2001
From: Aleksei Nikiforov <aleksei.nikiforov@linux.ibm.com>
Date: Mon, 24 Nov 2025 15:22:35 +0100
Subject: [PATCH 3/3] Fix byteswapping in convert_hf_to_gguf.py for remote
 models

---
 convert_hf_to_gguf.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 498ac3316d7..773031c4328 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -10116,10 +10116,16 @@ def byteswap_tensor(tensor: np.ndarray, dtype: type) -> np.ndarray:
 
     @classmethod
     def from_remote_tensor(cls, remote_tensor: gguf.utility.RemoteTensor):
+        def byteswap_tensor(tensor: np.ndarray, dtype: type) -> np.ndarray:
+            if sys.byteorder == 'big':
+                # switch data back to big endian
+                tensor = tensor.view(dtype).byteswap(inplace=False)
+            return tensor
         dtype = cls._dtype_str_map[remote_tensor.dtype]
+        numpy_dtype = cls._dtype_byteswap_map[dtype]
         shape = remote_tensor.shape
         meta = cls.meta_with_dtype_and_shape(dtype, shape)
-        lazy = cls(meta=meta, args=(remote_tensor,), func=lambda r: torch.frombuffer(r.data(), dtype=dtype).reshape(shape))
+        lazy = cls(meta=meta, args=(remote_tensor,), func=lambda r: torch.from_numpy(byteswap_tensor(np.frombuffer(r.data(), dtype=numpy_dtype), numpy_dtype)).view(dtype).reshape(shape))
         return cast(torch.Tensor, lazy)
 
     @classmethod