# More efficient broadcast of arrays with memmap

Data movement is where IPython's naïve model suffers the most.
But knowing about your cluster lets you make smarter decisions about data movement than a simple `rc[:].push`.

In [1]:
import socket
import os, sys, re

import numpy as np

import ipyparallel as ipp

In [2]:
#rc = parallel.Client(profile='dirac')
rc = ipp.Client()
eall = rc[:]

In [3]:
engine_hosts = eall.apply_async(socket.gethostname).get_dict()
engine_hosts

{0: 'touchy',
 1: 'touchy',
 2: 'touchy',
 3: 'touchy',
 4: 'touchy',
 5: 'touchy',
 6: 'touchy',
 7: 'touchy',
 8: 'touchy',
 9: 'touchy',
 10: 'touchy',
 11: 'touchy',
 12: 'touchy',
 13: 'touchy',
 14: 'touchy',
 15: 'touchy'}

In [4]:
host_engines = {}

for eid, host in engine_hosts.items():
    if host not in host_engines:
        host_engines[host] = []
    host_engines[host].append(eid)

host_engines

{'touchy': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]}

In [5]:
sz = 1024
data = np.random.random((sz,sz))
data = data.dot(data.T)

In [6]:
%time _ = rc[:].apply_sync(lambda : None)

CPU times: user 43.3 ms, sys: 10.5 ms, total: 53.8 ms
Wall time: 220 ms


In [7]:
%%time
ar = rc[:].push({'data': data}, block=False)
ar.wait_interactive()

_push: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 23.14tasks/s]

CPU times: user 131 ms, sys: 135 ms, total: 266 ms
Wall time: 872 ms





In [8]:
%px import numpy as np

In [9]:
def array_to_file(A):
    """write an array to a temporary file, return its filename"""
    import tempfile
    with tempfile.NamedTemporaryFile(suffix='.np', delete=False) as tf:
        np.save(tf, A)
        data_path = tf.name
    return data_path

In [10]:
@ipp.interactive
def load_memmap(name, path, mode='r+'):
    """load a file on disk into the interactive namespace as a memmapped array"""
    globals()[name] = np.memmap(path, mode=mode)

In [11]:
def bcast_memmap(data, name, client, host_engines):
    """broadcast a numpy array efficiently
    
    - sends data to each remote host only once
    - loads with memmap everywhere
    """

    # actually push the data, just once to each machine

    local_filename = None
    filenames_ars = {}
    for host, engines in host_engines.items():
        h0 = engines[0]
        if host == socket.gethostname():
            # Don't push at all to local engines
            local_filename = array_to_file(data)
        else:
            filenames_ars[host] = rc[h0].apply_async(array_to_file, data)

    # load the data on all engines into a memmapped array
    async_results = []
    for host, engines in host_engines.items():
        if host == socket.gethostname():
            filename = local_filename
        else:
            filename = filenames_ars[host].get()
        ar = rc[engines].apply_async(load_memmap, name, filename)
        async_results.append(ar)
    
    return client.get_result(async_results)

In [12]:
%%time
ar = bcast_memmap(data, 'data', rc, host_engines)
ar.wait_interactive()

unknown: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 93.66tasks/s]

CPU times: user 66.2 ms, sys: 28.6 ms, total: 94.8 ms
Wall time: 268 ms





In [13]:
%px np.linalg.norm(data, 2)

[0;31mOut[0:2]: [0m388468.7182554086

[0;31mOut[1:2]: [0m388468.7182554086

[0;31mOut[2:2]: [0m388468.7182554086

[0;31mOut[3:2]: [0m388468.7182554086

[0;31mOut[4:2]: [0m388468.7182554086

[0;31mOut[5:2]: [0m388468.7182554086

[0;31mOut[6:2]: [0m388468.7182554086

[0;31mOut[7:2]: [0m388468.7182554086

[0;31mOut[8:2]: [0m388468.7182554086

[0;31mOut[9:2]: [0m388468.7182554086

[0;31mOut[10:2]: [0m388468.7182554086

[0;31mOut[11:2]: [0m388468.7182554086

[0;31mOut[12:2]: [0m388468.7182554086

[0;31mOut[13:2]: [0m388468.7182554086

[0;31mOut[14:2]: [0m388468.7182554086

[0;31mOut[15:2]: [0m388468.7182554086

You can also do the same thing [with MPI](MPI%20Broadcast.ipynb).