In [1]:
from utils.multiprocessing_mkt import *
import numpy as np
import pandas as pd
import pymannkendall as mkt
import timeit

In [2]:
mkt.original_test(np.arange(10, 100))

Mann_Kendall_Test(trend='increasing', h=True, p=0.0, z=13.954947247751205, Tau=1.0, s=4005.0, var_s=82325.0, slope=1.0, intercept=10.0)

In [3]:
n = 10

arr = np.arange(n*n).reshape(n//2, n*2)

In [4]:
setup_code = """
from __main__ import arr
from utils.multiprocessing_mkt import multi_mkt
"""

test_code = """
multi_mkt(arr)
"""


num_exe = 10

time = timeit.repeat(test_code, setup=setup_code, number=num_exe, repeat=3)
time_per_execution = np.mean(np.array(time)/num_exe)

print(f"{(time_per_execution*1000):.3f} ms, for {arr.shape} observations MKT")

208.345 ms, for (5, 20) observations MKT


# Setting up params

In [5]:
grid_magnitude = 10**np.arange(3, 7)
grid_base = np.array([1, 2, 2.5, 4, 5, 7.5, 8])

num_grids = np.product(np.meshgrid(grid_base, grid_magnitude), axis=0)
num_grids = np.reshape(num_grids, np.product(num_grids.shape))
num_grids = np.array(num_grids, dtype=np.int)
num_grids

array([   1000,    2000,    2500,    4000,    5000,    7500,    8000,
         10000,   20000,   25000,   40000,   50000,   75000,   80000,
        100000,  200000,  250000,  400000,  500000,  750000,  800000,
       1000000, 2000000, 2500000, 4000000, 5000000, 7500000, 8000000])

In [6]:
obs_magnitude = 10**np.arange(1, 5)
obs_base = np.array([1, 2, 2.5, 4, 5, 7.5, 8])

num_obs = np.product(np.meshgrid(obs_base, obs_magnitude), axis=0)
num_obs = np.reshape(num_obs, np.product(num_obs.shape))
num_obs = np.array(num_obs, dtype=np.int)
num_obs

array([   10,    20,    25,    40,    50,    75,    80,   100,   200,
         250,   400,   500,   750,   800,  1000,  2000,  2500,  4000,
        5000,  7500,  8000, 10000, 20000, 25000, 40000, 50000, 75000,
       80000])

In [7]:
# filter out some values

num_grids = num_grids[(num_grids >= 5000) & (num_grids <= 100000)]
num_obs = num_obs[(num_obs > 10) & (num_obs <= 500)]

In [8]:
num_grids

array([  5000,   7500,   8000,  10000,  20000,  25000,  40000,  50000,
        75000,  80000, 100000])

In [9]:
num_obs

array([ 20,  25,  40,  50,  75,  80, 100, 200, 250, 400, 500])

# Benchmarking

In [10]:
pool_sizes = [2, 4, 6, 8]

In [11]:
dfs = []

for pool_size in pool_sizes:
    for obs_size in num_obs:
        times = []
        for num_grid in num_grids:
            arr = np.arange(obs_size)
            arr = np.repeat([arr], num_grid, axis=0)
            
            setup_code = "from __main__ import arr; from utils.multiprocessing_mkt import multi_mkt, single_mkt;"
            test_code = f"multi_mkt(arr, {pool_size})"

            num_exe = 1
            num_repeat = 1

            time = timeit.repeat(test_code, setup=setup_code, number=num_exe, repeat=num_repeat)
            time_per_execution = np.mean(np.array(time)/num_exe)
            
            times.append(time_per_execution)
            
            print(f"{(time_per_execution):10.3f} s, {arr.shape[0]:8d} {arr.shape[1]:5d}")
        df = pd.DataFrame({
            'pool_size': pool_size,
            'n': obs_size,
            'num_grids': num_grids,
            'time_s': times
        })
        dfs.append(df)
        
df = pd.concat(dfs)
df.to_csv("./csv/multiprocessing-mkt.csv", index=False)

     1.145 s,     5000    20
     1.601 s,     7500    20
     1.700 s,     8000    20
     2.073 s,    10000    20
     3.953 s,    20000    20
     4.832 s,    25000    20
     7.589 s,    40000    20
     9.515 s,    50000    20
    14.189 s,    75000    20
    15.037 s,    80000    20
    18.815 s,   100000    20
     1.262 s,     5000    25
     1.771 s,     7500    25
     1.857 s,     8000    25
     2.284 s,    10000    25
     4.402 s,    20000    25
     5.402 s,    25000    25
     8.489 s,    40000    25
    10.596 s,    50000    25
    15.902 s,    75000    25
    16.832 s,    80000    25
    21.150 s,   100000    25
     1.569 s,     5000    40
     2.232 s,     7500    40
     2.401 s,     8000    40
     2.925 s,    10000    40
     5.716 s,    20000    40
     7.102 s,    25000    40
    10.965 s,    40000    40
    13.752 s,    50000    40
    20.453 s,    75000    40
    21.801 s,    80000    40
    27.612 s,   100000    40
     1.785 s,     5000    50
     2.577 s, 

In [12]:
df

Unnamed: 0,pool_size,n,num_grids,time_s
0,2,20,5000,1.145423
1,2,20,7500,1.601490
2,2,20,8000,1.700099
3,2,20,10000,2.072802
4,2,20,20000,3.952936
...,...,...,...,...
6,8,500,40000,30.081481
7,8,500,50000,37.466961
8,8,500,75000,56.066102
9,8,500,80000,59.402026


In [13]:
dfs = []

for obs_size in num_obs:
    times = []
    for num_grid in num_grids:
        arr = np.arange(obs_size)
        arr = np.repeat([arr], num_grid, axis=0)

        setup_code = "from __main__ import arr; from utils.multiprocessing_mkt import multi_mkt, single_mkt;"
        test_code = "single_mkt(arr)"

        num_exe = 1
        num_repeat = 1

        time = timeit.repeat(test_code, setup=setup_code, number=num_exe, repeat=num_repeat)
        time_per_execution = np.mean(np.array(time)/num_exe)

        times.append(time_per_execution)

        print(f"{(time_per_execution):10.3f} s, {arr.shape[0]:8d} {arr.shape[1]:5d}")
    df = pd.DataFrame({
        'n': obs_size,
        'num_grids': num_grids,
        'time_s': times
    })
    dfs.append(df)
        
df = pd.concat(dfs)
df.to_csv("./csv/singleprocessing-mkt.csv", index=False)

     1.839 s,     5000    20
     2.733 s,     7500    20
     2.909 s,     8000    20
     3.639 s,    10000    20
     7.270 s,    20000    20
     9.106 s,    25000    20
    14.520 s,    40000    20
    18.081 s,    50000    20
    27.094 s,    75000    20
    28.825 s,    80000    20
    36.043 s,   100000    20
     2.012 s,     5000    25
     3.028 s,     7500    25
     3.229 s,     8000    25
     4.045 s,    10000    25
     8.060 s,    20000    25
    10.109 s,    25000    25
    16.151 s,    40000    25
    20.190 s,    50000    25
    30.302 s,    75000    25
    32.367 s,    80000    25
    40.403 s,   100000    25
     2.650 s,     5000    40
     3.963 s,     7500    40
     4.234 s,     8000    40
     5.281 s,    10000    40
    10.598 s,    20000    40
    13.237 s,    25000    40
    21.186 s,    40000    40
    26.509 s,    50000    40
    39.761 s,    75000    40
    42.433 s,    80000    40
    53.008 s,   100000    40
     3.063 s,     5000    50
     4.594 s, 