While benchmarking a port of some NumPy code to this library, I observed a noticeable performance drop in my simple tests, with runtimes up to roughly 2x slower than the original NumPy implementation.
Before investigating further, I'd like to check whether this is expected, whether my usage is suboptimal, or whether it might be an issue worth looking into. I've attached a minimal reproducible example below.
4000 250 3 Numpy inplace 0.50s
4000 250 3 Numpy standard 0.66s
4000 250 3 Array_compat 1.44s
4000 250 3 Array_compat inplace 1.32s
import numpy as np
import time
from array_api_compat import array_namespace
from copy import deepcopy
rng=np.random.default_rng(0)
N=4000
s=250
c=3
base_imgs=[]
for i in range(N):
base_imgs.append(rng.random((s,s,c),dtype=np.float32))
#### Test numpy speed vs arraycompat speed
minimum = 0.001
maximum = 0.997
print("Made img list")
# copy images to allow inplace modification
imgs=deepcopy(base_imgs)
start_time=time.perf_counter()
for data in imgs:
np.clip(data, minimum, maximum, out=data)
np.subtract(data, minimum, out=data)
np.true_divide(data, maximum - minimum, out=data)
np.log(data,out=data)
end_time= time.perf_counter()
print(N,s,c, f"Numpy inplace {end_time-start_time:.2f}s")
del imgs
imgs=deepcopy(base_imgs)
start_time=time.perf_counter()
for data in imgs:
#xp=array_namespace(data)
data = np.clip(data, minimum, maximum)
data = data - minimum
data = data / (maximum - minimum)
data = np.log(data)
end_time= time.perf_counter()
print(N,s,c, f"Numpy standard {end_time-start_time:.2f}s")
del imgs
imgs=deepcopy(base_imgs)
xp=array_namespace(imgs[0])
data=deepcopy(imgs[0])
data = xp.clip(data, minimum, maximum)
data = data - minimum
data = data / (maximum - minimum)
data = xp.log(data)
del data
start_time=time.perf_counter()
for data in imgs:
data = xp.clip(data, minimum, maximum)
data = data - minimum
data = data / (maximum - minimum)
data = xp.log(data)
end_time= time.perf_counter()
print(N,s,c, f"Array_compat {end_time-start_time:.2f}s")
start_time=time.perf_counter()
for data in imgs:
xp.clip(data, minimum, maximum,out=data)
xp.subtract(data, minimum, out=data)
xp.true_divide(data, maximum - minimum, out=data)
xp.log(data,out=data)
end_time= time.perf_counter()
print(N,s,c, f"Array_compat inplace {end_time-start_time:.2f}s")
Hello,
While benchmarking a port of some NumPy code to this library, I observed a noticeable performance drop in my simple tests, with runtimes up to roughly 2x slower than the original NumPy implementation.
Before investigating further, I'd like to check whether this is expected, whether my usage is suboptimal, or whether it might be an issue worth looking into. I've attached a minimal reproducible example below.
Thanks!
Python 3.11
Numpy version: 2.4.6
array_api_compat version: 1.15.0
Output: