In [1]:
import numpy as np
import pandas as pd
import pymannkendall as mkt
import timeit

# Timeit

https://docs.python.org/3/library/timeit.html#timeit.Timer

- time to run a statement (for a `number` times) is measured as **sec** unit


In [2]:
n = 100

a = np.arange(1, n+1)
mkt.original_test(a)

Mann_Kendall_Test(trend='increasing', h=True, p=0.0, z=14.73869998208331, Tau=1.0, s=4950.0, var_s=112750.0, slope=1.0, intercept=1.0)

In [3]:
setup_code = """
from __main__ import a
import pymannkendall as mkt
"""

test_code = """
mkt.original_test(a)
"""

num_exe = 1000

time = timeit.repeat(test_code, setup=setup_code, number=num_exe, repeat=3)
time_per_execution = np.mean(np.array(time)/num_exe)

print(f"{(time_per_execution*1000):.3f} ms, for {n} observations MKT")

1.033 ms, for 100 observations MKT


# Benchmarking MKT

- let's measure the time to run it

In [4]:
# generate observation sizes

magnitude = 10**np.arange(1, 6)
base = np.array([1, 2, 2.5, 4, 5, 7.5, 8])

num_obs = np.product(np.meshgrid(base, magnitude), axis=0)
num_obs = np.reshape(num_obs, np.product(num_obs.shape))
num_obs = np.array(num_obs, dtype=np.int)
num_obs

array([    10,     20,     25,     40,     50,     75,     80,    100,
          200,    250,    400,    500,    750,    800,   1000,   2000,
         2500,   4000,   5000,   7500,   8000,  10000,  20000,  25000,
        40000,  50000,  75000,  80000, 100000, 200000, 250000, 400000,
       500000, 750000, 800000])

In [5]:
num_obs = num_obs[num_obs <= 40000]

In [6]:
times = []

for size in num_obs:
    arr = np.arange(1, size+1, dtype=np.float)
    arr += np.random.rand(size)
    
    setup_code = "from __main__ import arr; import pymannkendall as mkt"
    test_code = "mkt.original_test(arr)"
    
    num_exe = 100
    num_repeat = 5
    
    if size >= 2000:
        # It'll take too long time to run for large arrays
        num_exe = 5
        num_repeat = 2

    time = timeit.repeat(test_code, setup=setup_code, number=num_exe, repeat=num_repeat)
    time_per_execution = np.mean(np.array(time)/num_exe)

    times.append(time_per_execution)
    
    print(f"{size:<10d} {(time_per_execution*1000):>10.3f} ms")

10              0.286 ms
20              0.363 ms
25              0.399 ms
40              0.522 ms
50              0.605 ms
75              0.818 ms
80              0.852 ms
100             1.035 ms
200             2.035 ms
250             2.581 ms
400             4.329 ms
500             6.048 ms
750            10.798 ms
800            11.410 ms
1000           16.440 ms
2000           46.208 ms
2500           70.785 ms
4000          163.300 ms
5000          270.219 ms
7500          459.111 ms
8000          599.967 ms
10000         803.979 ms
20000        3661.005 ms
25000        4936.812 ms
40000       12999.940 ms
50000       23054.666 ms
75000      111629.899 ms
80000      162055.071 ms
100000     403986.622 ms


In [7]:
df = pd.DataFrame({'n': num_obs, 'time_ms': np.round(np.array(times)*1000, 4)})

df

Unnamed: 0,n,time_ms
0,10,0.2863
1,20,0.3627
2,25,0.399
3,40,0.522
4,50,0.6045
5,75,0.8176
6,80,0.8521
7,100,1.0349
8,200,2.035
9,250,2.5814


In [8]:
df.to_csv("./csv/MKT_time_vs_n.csv", index=False)