In [1]:
%env CUPY_ACCELERATORS = cub
import sys
import unittest

import dask.array
import dask.distributed as dd
import cupy
import xarray as xr

import src.geocat.comp.meteorology_gpu as geo_gpu
import src.geocat.comp.meteorology as geo

import numpy
import time

env: CUPY_ACCELERATORS=cub


In [2]:
#reimplemented test functions
def test_list_input(xp = cupy):
    tk = (xp.asarray(t_def) + 273.15).tolist()
    if(xp.__name__ == "cupy"):
        assert xp.allclose(geo_gpu.dewtemp_gpu(tk, rh_def) - 273.15, dt_2, 0.1)
    else:
        assert xp.allclose(geo.dewtemp(tk, rh_def) - 273.15, dt_2, 0.1)

def test_numpy_input(xp = cupy):
    tk = xp.asarray(t_def) + 273.15
    rh = xp.asarray(rh_def)
    if(xp.__name__ == "cupy"):
        assert xp.allclose(geo_gpu.dewtemp_gpu(tk, rh) - 273.15, dt_2, 0.1)
    else:
        assert xp.allclose(geo.dewtemp(tk, rh) - 273.15, dt_2, 0.1)

In [4]:
#Test_dewtemp
for i in range(1,8):
    print("Array size: ", 10**i)
    for xp in [numpy, cupy]:
        repeat = xp.zeros([10,])
        for rep in range(0,10):
            t_def = xp.random.uniform(low=-52,high=29.5,size=10**i)
            dt_1 = 6.3
            rh_def = xp.random.uniform(low=41.7,high=90.5, size=10**i)
            tk = (xp.asarray(t_def) + 273.15).tolist()
            if(xp ==cupy):
                dt_2 = geo_gpu.dewtemp_gpu(tk, rh_def) - 273.15
            else:
                dt_2 = geo.dewtemp(tk, rh_def) - 273.15
            test_numpy_input(xp)
            cupy.cuda.runtime.deviceSynchronize()
            t1 = time.time()
            test_numpy_input(xp)
            cupy.cuda.runtime.deviceSynchronize()
            t2 = time.time()
            repeat[rep] = t2-t1
        print(xp.__name__,numpy.mean(repeat), "seconds") 

Array size:  10
numpy 4.92095947265625e-05 seconds
cupy 0.00031914710998535154 seconds
Array size:  100
numpy 5.8817863464355466e-05 seconds
cupy 0.0002692222595214844 seconds
Array size:  1000
numpy 7.410049438476563e-05 seconds
cupy 0.0002713918685913086 seconds
Array size:  10000
numpy 0.00023038387298583983 seconds
cupy 0.0003431558609008789 seconds
Array size:  100000
numpy 0.001743173599243164 seconds
cupy 0.0003113269805908203 seconds
Array size:  1000000
numpy 0.022282862663269044 seconds
cupy 0.0009396791458129883 seconds
Array size:  10000000
numpy 0.40211169719696044 seconds
cupy 0.009890913963317871 seconds


In [12]:
from cupyx.profiler import benchmark
for i in range(1,8):
    print("Size: ", 10**i)
    t_def = cupy.random.uniform(low=-52,high=29.5,size=10**i)
    dt_1 = 6.3
    rh_def = cupy.random.uniform(low=41.7,high=90.5, size=10**i)
    tk = (cupy.asarray(t_def) + 273.15).tolist()
    dt_2 = geo_gpu.dewtemp_gpu(tk, rh_def) - 273.15
    print(benchmark(geo_gpu.dewtemp_gpu,(tk, rh_def), n_repeat=10))

Size:  10
dewtemp_gpu         :    CPU:  236.307 us   +/- 9.846 (min:  226.734 / max:  259.846) us     GPU-0:  242.074 us   +/-10.334 (min:  231.424 / max:  267.264) us
Size:  100
dewtemp_gpu         :    CPU:  242.669 us   +/- 9.383 (min:  235.835 / max:  266.886) us     GPU-0:  248.525 us   +/- 9.929 (min:  241.664 / max:  274.432) us
Size:  1000
dewtemp_gpu         :    CPU:  328.679 us   +/-30.558 (min:  299.669 / max:  391.095) us     GPU-0:  335.462 us   +/-31.712 (min:  305.152 / max:  400.384) us
Size:  10000
dewtemp_gpu         :    CPU:  829.644 us   +/-10.828 (min:  815.865 / max:  853.634) us     GPU-0:  836.915 us   +/-10.943 (min:  823.296 / max:  861.184) us
Size:  100000
dewtemp_gpu         :    CPU: 5730.552 us   +/-86.102 (min: 5637.140 / max: 5950.808) us     GPU-0: 5737.984 us   +/-86.227 (min: 5644.288 / max: 5958.656) us
Size:  1000000
dewtemp_gpu         :    CPU:57688.194 us   +/-2569.546 (min:54997.580 / max:61841.686) us     GPU-0:58421.146 us   +/-2560.600 (m

In [None]:
for i in range(1,8):
    print("Size: ", 10**i)
    for xp in [numpy, cupy]:
        repeat = xp.zeros([10,])
        for rep in range(0,10):
            t_def = xp.random.uniform(low=-52,high=29.5,size=10**i)
            dt_1 = 6.3
            rh_def = xp.random.uniform(low=41.7,high=90.5, size=10**i)
            tk = (xp.asarray(t_def) + 273.15).tolist()
            if(xp ==cupy):
                dt_2 = geo_gpu.dewtemp_gpu(tk, rh_def) - 273.15
            else:
                dt_2 = geo.dewtemp(tk, rh_def) - 273.15
            test_list_input(xp)
            if(xp == cupy):
                start_gpu = cupy.cuda.Event()
                end_gpu = cupy.cuda.Event()
                start_gpu.record()
                test_numpy_input(xp)
                end_gpu.record()
                end_gpu.synchronize()
                t_gpu = cupy.cuda.get_elapsed_time(start_gpu, end_gpu)
                t_gpu = t_gpu *0.001
                repeat[rep] = t_gpu
            elif(xp == numpy):
                start_cpu = time.perf_counter()
                test_numpy_input(xp)
                end_cpu = time.perf_counter()
                t_cpu = end_cpu - start_cpu 
                repeat[rep] = t_cpu
        print(xp.__name__,numpy.mean(repeat))


Size:  10
numpy 4.7510117292404173e-05
cupy 0.0003421087950468063
Size:  100
numpy 5.8483262546360495e-05
cupy 0.0005542976021766663
Size:  1000
numpy 7.12360953912139e-05
cupy 0.00029066240489482884
Size:  10000
numpy 0.00019392385147511959
cupy 0.0003050399988889695
Size:  100000
numpy 0.001798791321925819
cupy 0.0003414719969034195
Size:  1000000
numpy 0.02238680743612349
cupy 0.001039027202129364
Size:  10000000


In [8]:
cupy.free

array([ 993.2678833 , 1121.49035645,  981.06970215, 1001.98492432,
        957.3192749 ,  958.70098877,  978.659729  , 1005.62792969,
        990.78210449,  949.07855225])