In [1]:
import numpy as np
import dask.async
import dask.array as da
import os
os.chdir('../..')
import allel

In [2]:
g = allel.GenotypeArray([[[0, 0], [0, 1], [0, 1]],
                         [[0, 1], [1, 1], [1, 1]],
                         [[1, 1], [1, 1], [0, 0]],
                         [[1, 1], [1, 1], [0, 0]],
                         [[1, 1], [0, 1], [-1, -1]]], dtype='i1')
g

Unnamed: 0,0,1,2
0,0/0,0/1,0/1
1,0/1,1/1,1/1
2,1/1,1/1,0/0
3,1/1,1/1,0/0
4,1/1,0/1,./.


In [3]:
# the mapping array is effectively a look-up table, telling how to transform
# integer values in each row
mapping = np.array([[0, 1],  # no transformation
                    [1, 0],  # 0->1, 1->0
                    [0, 0],  # 0->0, 1->0
                    [1, 0],  # 0->1, 1->0
                    [0, 1]], # no transformation
                   dtype='i1')

In [4]:
# the only shape constraint is that size of first dimension must match
assert g.shape[0] == mapping.shape[0]

In [5]:
# this is the pure numpy implementation
expect = g.map_alleles(mapping)
expect

Unnamed: 0,0,1,2
0,0/0,0/1,0/1
1,1/0,0/0,0/0
2,0/0,0/0,0/0
3,0/0,0/0,1/1
4,1/1,0/1,./.


## Dask with 1D chunking

In [6]:
chunks_dim0 = 2

In [7]:
gd1 = da.from_array(g, chunks=(chunks_dim0, None, None))  # N.B., chunk across entire rows
gd1

dask.array<from-ar..., shape=(5, 3, 2), dtype=int8, chunksize=(2, 3, 2)>

In [8]:
md = da.from_array(mapping, chunks=(chunks_dim0, None))  # N.B., first dimension chunk size matches gd1
md

dask.array<from-ar..., shape=(5, 2), dtype=int8, chunksize=(2, 2)>

In [9]:
def f(block, bmapping):
    return allel.GenotypeArray(block).map_alleles(bmapping[:, 0, :])

In [10]:
gmapped1 = da.map_blocks(f, gd1, md[:, None, :])
gmapped1

dask.array<atop-ac..., shape=(5, 3, 2), dtype=None, chunksize=(2, 3, 2)>

In [11]:
allel.GenotypeArray(gmapped1.compute())

Unnamed: 0,0,1,2
0,0/0,0/1,0/1
1,1/0,0/0,0/0
2,0/0,0/0,0/0
3,0/0,0/0,1/1
4,1/1,0/1,./.


In [12]:
np.array_equal(expect, gmapped1.compute())

True

## Dask with 2D chunking

In [13]:
chunks_dim1 = 2

In [14]:
gd2 = da.from_array(g, chunks=(chunks_dim0, chunks_dim1, None))
gd2

dask.array<from-ar..., shape=(5, 3, 2), dtype=int8, chunksize=(2, 2, 2)>

In [15]:
gmapped2 = da.map_blocks(f, gd2, md[:, None, :])
gmapped2

dask.array<atop-2e..., shape=(5, 3, 2), dtype=None, chunksize=(2, 2, 2)>

In [16]:
np.array_equal(expect, gmapped2.compute())

True

It works!