Related: https://superfastpython.com/benchmark-fastest-way-to-copy-numpy-array/

Conclusion of this notebook:
* All copy methods are about the same speed.
* Copying to shared memory is as fast as a regular copy (expected at it's also RAM).
* Copy is already (maximally) parallelized, so starting threads doesn't help.
* My laptop can copy at about 16 GB / s

In [None]:
import numpy as np

point_cloud_0 = np.zeros((2208 * 1242, 3), dtype=np.float32)
point_cloud_1 = np.zeros((2208 * 1242, 3), dtype=np.float32)
point_cloud_2 = np.zeros((2208 * 1242, 3), dtype=np.float32)
point_cloud_3 = np.zeros((2208 * 1242, 3), dtype=np.float32)
point_cloud_4 = np.zeros((2208 * 1242, 3), dtype=np.float32)
point_cloud_5 = np.zeros((2208 * 1242, 3), dtype=np.float32)

In [None]:
from multiprocessing import shared_memory

point_cloud_shm = shared_memory.SharedMemory(create=True, size=point_cloud_0.nbytes, name="point_cloud")
point_cloud_6 = np.ndarray((2208 * 1242, 3), dtype=np.float32, buffer=point_cloud_shm.buf)


In [None]:
point_cloud_0.nbytes # 32 MB

In [None]:
%%timeit
point_cloud_1 = point_cloud_0.copy()

In [None]:
%%timeit
point_cloud_1[:] = point_cloud_0[:]

In [None]:
%%timeit
point_cloud_1[:] = point_cloud_0[:]
point_cloud_3[:] = point_cloud_2[:]
point_cloud_5[:] = point_cloud_4[:]

In [None]:
import threading
import copy
from loguru import logger

def copy_values(a, b) -> None:
    "Copies the values of a to b."
    # logger.info(f"Thread {threading.current_thread().name} copying.")
    # These all seem to works and take 2 ms for a 32 MB array.
    b[:] = a[:]
    # b = copy.deepcopy(a)
    # b = a.copy()
    # b = np.copy(a)
    # logger.info(f"Thread {threading.current_thread().name} finished copying.")

In [None]:
%%timeit
t1 = threading.Thread(target=lambda: copy_values(point_cloud_0, point_cloud_1))
t2 = threading.Thread(target=lambda: copy_values(point_cloud_2, point_cloud_3))
t3 = threading.Thread(target=lambda: copy_values(point_cloud_4, point_cloud_5))


t1.start()
t2.start()
t3.start()

t1.join()
t2.join()
t3.join()

In [None]:
%%timeit
point_cloud_6[:] = point_cloud_0[:]

In [None]:
%%timeit
t1 = threading.Thread(target=lambda: copy_values(point_cloud_0, point_cloud_6))
t1.start()
# t1.join()

In [None]:
import time
start_time = time.time()
n_copies = 0

while time.time() - start_time < 10:
    copy_values(point_cloud_0, point_cloud_6)
    n_copies += 1

print(f"Number of copies: {n_copies} (32 MB array, 10 seconds)")
print(f"Speed in GB/s: {n_copies * point_cloud_0.nbytes / 1e9 / 10:.2f} GB/s")