In [None]:
import dask
import numpy as np
from scipy.stats import pearsonr

## Shuffling with dask.delayed
@dask.delayed
def shuffle_data(ar):
    rand_shift = np.random.randint(0, ar.shape[0])
    return np.roll(ar, rand_shift)

@dask.delayed
def return_pearsonr(one, two):
    return pearsonr(one, two)[0]

@dask.delayed
def combine_shuffled_r_vals(cor, nshuffles, shuffle):
    ar = np.zeros((nshuffles))
    ar[shuffle] = cor 
    return ar

## Set seed for reproducibility
np.random.seed(24601)
nshuffles = 5000
ar_size = 2000

In [None]:
## Create two arrays to correlate
ar = np.random.normal(size=ar_size)
stable_ar = np.random.normal(size=ar_size)

In [None]:
## Classical way of shuffling
shuffled_ar = np.zeros((nshuffles))
for idx, shuffle in enumerate(np.arange(0, nshuffles)):
    rand_shift = np.random.randint(0, ar.shape[0])
    rolled = np.roll(ar, rand_shift)
    shuffled_ar[idx] = pearsonr(rolled, stable_ar)[0]

In [None]:
## Faster shuffling with dask?
for shuffle in np.arange(0, nshuffles):
    rolled = shuffle_data(ar)
    cor = return_pearsonr(rolled, stable_ar)
    shuffled_ar = combine_shuffled_r_vals(cor, nshuffles, shuffle)

In [None]:
shuffled_ar.visualize()