In [1]:
import numpy as np
import cupy as cp
from cupy.random import randint as RandInt
print("numpy", np.__version__)
print("cupy", cp.__version__)

numpy 1.18.1
cupy 7.2.0


In [2]:
mask = np.isin([1,2,3,4,5,8,6,1,1],[1,2,3,5,1])

mask[mask.cumsum() > 5] = False
mask

array([ True,  True,  True, False,  True, False, False,  True, False])

In [31]:
from cupy.cuda import memory_hooks
hook = memory_hooks.LineProfileHook()
with hook:
    # some CuPy codes
    mask = cp.isin(cp.array([1,2,3,4,5,8,6,1,1]),cp.array([1,2,3,5,1]))
hook.print_report()
mask

_root (2.50KB, 512.00B)
  c:\users\brian\appdata\local\programs\python\python38\lib\runpy.py:193:_run_module_as_main (2.50KB, 512.00B)
    c:\users\brian\appdata\local\programs\python\python38\lib\runpy.py:86:_run_code (2.50KB, 512.00B)
      c:\users\brian\appdata\local\programs\python\python38\lib\site-packages\ipykernel_launcher.py:16:<module> (2.50KB, 512.00B)
        c:\users\brian\appdata\local\programs\python\python38\lib\site-packages\traitlets\config\application.py:664:launch_instance (2.50KB, 512.00B)
          c:\users\brian\appdata\local\programs\python\python38\lib\site-packages\ipykernel\kernelapp.py:583:start (2.50KB, 512.00B)
            c:\users\brian\appdata\local\programs\python\python38\lib\site-packages\tornado\platform\asyncio.py:148:start (2.50KB, 512.00B)
              c:\users\brian\appdata\local\programs\python\python38\lib\asyncio\base_events.py:567:run_forever (2.50KB, 512.00B)
                c:\users\brian\appdata\local\programs\python\python38\lib\asyncio

array([ True,  True,  True, False,  True, False, False,  True,  True])

In [33]:

    x = cp.array([1, 2, 3])
    del x 

{"hook":"malloc","device_id":0,"size":12,"mem_size":512,"mem_ptr":47383053824,"pmem_id":"0x2402ca7c0b0"}
{"hook":"free","device_id":0,"mem_size":512,"mem_ptr":47383053824,"pmem_id":"0x2402ca7c0b0"}


In [2]:
mempool = cp.get_default_memory_pool()
pinned_mempool = cp.get_default_pinned_memory_pool()

# Create an array on CPU.
# NumPy allocates 400 bytes in CPU (not managed by CuPy memory pool).
#a_cpu = numpy.ndarray(100, dtype=numpy.float32)
#print(a_cpu.nbytes)                      # 400

# You can access statistics of these memory pools.
print(mempool.used_bytes())              # 0
print(mempool.total_bytes())             # 0
print(pinned_mempool.n_free_blocks())    # 0

0
0
0


In [3]:
from cupy.random import randint as RandInt
from cupy.cuda import memory_hooks
with memory_hooks.DebugPrintHook():
    g1 = RandInt(1, high=4096*4096, size=7500)
    g2 = RandInt(1, high=4096*4096, size=30000)



{"hook":"alloc","device_id":0,"mem_size":512,"mem_ptr":55972986880}
{"hook":"malloc","device_id":0,"size":4,"mem_size":512,"mem_ptr":55972986880,"pmem_id":"0x11e1d192f70"}
{"hook":"alloc","device_id":0,"mem_size":30208,"mem_ptr":55972987392}
{"hook":"malloc","device_id":0,"size":30000,"mem_size":30208,"mem_ptr":55972987392,"pmem_id":"0x11e6f0195f0"}
{"hook":"alloc","device_id":0,"mem_size":7680,"mem_ptr":55973214208}
{"hook":"malloc","device_id":0,"size":7500,"mem_size":7680,"mem_ptr":55973214208,"pmem_id":"0x11e6f00f8f0"}
{"hook":"alloc","device_id":0,"mem_size":512,"mem_ptr":55973221888}
{"hook":"malloc","device_id":0,"size":1,"mem_size":512,"mem_ptr":55973221888,"pmem_id":"0x11e6f00f5b0"}
{"hook":"free","device_id":0,"mem_size":512,"mem_ptr":55973221888,"pmem_id":"0x11e6f00f5b0"}
{"hook":"free","device_id":0,"mem_size":512,"mem_ptr":55972986880,"pmem_id":"0x11e1d192f70"}
{"hook":"free","device_id":0,"mem_size":7680,"mem_ptr":55973214208,"pmem_id":"0x11e6f00f8f0"}
{"hook":"alloc","de

In [4]:
g1

array([ 3303888,  9452227,   655191, ..., 11158492, 16475535,  3053514])

In [5]:
g2

array([ 2606976, 12659379,   884797, ..., 10871339,  7495338, 11785720])

In [6]:
g2.dtype

dtype('int32')

In [8]:
with memory_hooks.DebugPrintHook():
    g3 = cp.isin(g1, g2)

{"hook":"alloc","device_id":0,"mem_size":225000448,"mem_ptr":55975084032}
{"hook":"malloc","device_id":0,"size":225000000,"mem_size":225000448,"mem_ptr":55975084032,"pmem_id":"0x11e32502df0"}
{"hook":"malloc","device_id":0,"size":30000,"mem_size":30208,"mem_ptr":55972987392,"pmem_id":"0x11e1d289e30"}
{"hook":"free","device_id":0,"mem_size":225000448,"mem_ptr":55975084032,"pmem_id":"0x11e32502df0"}
{"hook":"malloc","device_id":0,"size":7500,"mem_size":7680,"mem_ptr":55973214208,"pmem_id":"0x11e32502e30"}
{"hook":"free","device_id":0,"mem_size":30208,"mem_ptr":55972987392,"pmem_id":"0x11e1d289e30"}


In [9]:
g3

array([False, False, False, ..., False, False, False])

In [10]:
g3.dtype

dtype('bool')

In [3]:
num = 100_000
x = RandInt(1, high=(4096*4096)-1, size=num)
y = RandInt(1, high=(4096*4096)-1, size=num)
bool_arr = cp.zeros(num, dtype=bool)
X = cp.asnumpy(x)
Y = cp.asnumpy(y)
#print(x)
#print(y)
%timeit -n 1 np.isin(X,Y)
print(x.dtype)
kernel = cp.ElementwiseKernel('T num,  T x,  raw T y', 'bool z',
    '''int t = 0; 
    z = 0;
    #pragma unroll
    for(t = 0; t < num; t++) z = z || (x == y[t]);''',
    'my_kernel')

%timeit -n 1  bool_arr = kernel(num, x, y)
#kernel(num, x, y)


16.7 ms ± 61 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
int32
The slowest run took 30.73 times longer than the fastest. This could mean that an intermediate result is being cached.
54.9 µs ± 102 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
a = cp.array([0,2,4,6], cp.int32)
b = cp.array([1,2,4,8], cp.int32)
print(kernel(4,a,b))

[False  True  True False]


In [19]:
print(X)
print(Y)

[23 56 39 ... 21 82 76]
[65 74 17 ... 68 23 54]


In [None]:
image2[:,:,3] = np.where(img[:,:,0]==img2[:,:,0], 0, image2[:,:,3]) # how can I make this work for NASA

In [22]:
Z = np.isin(X,Y)
z = kernel(num, x, y)
nz = cp.asnumpy(z)
count =0
truth =0
for i in np.arange(num):
    if Z[i] != z[i] :
        count = count + 1
        if(z[i] == True) : truth = truth + 1
print ("Found ERROR", count, " FOUND", truth)

Found ERROR 0  FOUND 0


In [21]:
print(Z)
print(nz)

[ True  True  True ...  True  True  True]
[ True  True  True ...  True  True  True]


In [10]:
with memory_hooks.DebugPrintHook():
    result = [ cp.any( g2==x) for x in g1[:55] ]
print(result)

{"hook":"malloc","device_id":0,"size":30000,"mem_size":30208,"mem_ptr":47383052800,"pmem_id":"0x27eecc96fb0"}
{"hook":"malloc","device_id":0,"size":1,"mem_size":512,"mem_ptr":47383281152,"pmem_id":"0x27ed54582b0"}
{"hook":"free","device_id":0,"mem_size":30208,"mem_ptr":47383052800,"pmem_id":"0x27eecc96fb0"}
{"hook":"malloc","device_id":0,"size":30000,"mem_size":30208,"mem_ptr":47383052800,"pmem_id":"0x27eecdadd30"}
{"hook":"malloc","device_id":0,"size":1,"mem_size":512,"mem_ptr":47383281664,"pmem_id":"0x27eecd8f0f0"}
{"hook":"free","device_id":0,"mem_size":30208,"mem_ptr":47383052800,"pmem_id":"0x27eecdadd30"}
{"hook":"malloc","device_id":0,"size":30000,"mem_size":30208,"mem_ptr":47383052800,"pmem_id":"0x27eecd8bfb0"}
{"hook":"malloc","device_id":0,"size":1,"mem_size":512,"mem_ptr":47383282176,"pmem_id":"0x27eecd8f030"}
{"hook":"free","device_id":0,"mem_size":30208,"mem_ptr":47383052800,"pmem_id":"0x27eecd8bfb0"}
{"hook":"malloc","device_id":0,"size":30000,"mem_size":30208,"mem_ptr":47

In [11]:
# You can access statistics of these memory pools.
print(mempool.used_bytes())              # 0
print(mempool.total_bytes())             # 0
print(pinned_mempool.n_free_blocks())    # 0

178688
309760
0


In [12]:
!nvidia-smi

Sun Feb 23 19:55:44 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 441.22       Driver Version: 441.22       CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  TITAN X (Pascal)   WDDM  | 00000000:01:00.0  On |                  N/A |
| 23%   40C    P8    13W / 250W |   1605MiB / 12288MiB |      2%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|    0  