# More efficient broadcast of arrays with memmap

Data movement is where IPython's naïve model suffers the most.
But knowing about your cluster lets you make smarter decisions about data movement than a simple `rc[:].push`.

In [1]:
import socket
import os, sys, re

import numpy as np

import ipyparallel as ipp

In [2]:
rc = ipp.Client()
eall = rc.broadcast_view(coalescing=True)

In [3]:
engine_hosts = eall.apply_async(socket.gethostname).get_dict()
engine_hosts

{0: 'cb13718df49d',
 1: 'cb13718df49d',
 2: 'cb13718df49d',
 3: 'cb13718df49d',
 4: 'e1faf94f9ba2',
 5: 'cb13718df49d',
 6: 'cb13718df49d',
 7: 'cb13718df49d',
 8: 'cb13718df49d',
 9: 'cb13718df49d',
 10: '6c9ca90ca5d9',
 11: 'cb13718df49d',
 12: 'c2bcf9a92235',
 13: 'cb13718df49d',
 14: 'e1faf94f9ba2',
 15: 'c2bcf9a92235',
 16: 'cb13718df49d',
 17: 'e1faf94f9ba2',
 18: '6c9ca90ca5d9',
 19: 'e1faf94f9ba2',
 20: 'cb13718df49d',
 21: 'cb13718df49d',
 22: 'e1faf94f9ba2',
 23: 'c2bcf9a92235',
 24: 'c2bcf9a92235',
 25: 'cb13718df49d',
 26: 'c2bcf9a92235',
 27: 'e1faf94f9ba2',
 28: '6c9ca90ca5d9',
 29: '6c9ca90ca5d9',
 30: 'e1faf94f9ba2',
 31: 'cb13718df49d',
 32: 'e1faf94f9ba2',
 33: '6c9ca90ca5d9',
 34: '6c9ca90ca5d9',
 35: '6c9ca90ca5d9',
 36: 'e1faf94f9ba2',
 37: 'e1faf94f9ba2',
 38: 'c2bcf9a92235',
 39: 'c2bcf9a92235',
 40: 'e1faf94f9ba2',
 41: 'c2bcf9a92235',
 42: '6c9ca90ca5d9',
 43: 'c2bcf9a92235',
 44: 'c2bcf9a92235',
 45: 'c2bcf9a92235',
 46: 'e1faf94f9ba2',
 47: 'e1faf94f9ba2',
 4

In [4]:
host_engines = {}

for eid, host in engine_hosts.items():
    if host not in host_engines:
        host_engines[host] = []
    host_engines[host].append(eid)

host_engines

{'cb13718df49d': [0, 1, 2, 3, 5, 6, 7, 8, 9, 11, 13, 16, 20, 21, 25, 31],
 'e1faf94f9ba2': [4,
  14,
  17,
  19,
  22,
  27,
  30,
  32,
  36,
  37,
  40,
  46,
  47,
  49,
  53,
  58],
 '6c9ca90ca5d9': [10,
  18,
  28,
  29,
  33,
  34,
  35,
  42,
  48,
  50,
  54,
  56,
  57,
  59,
  60,
  62],
 'c2bcf9a92235': [12,
  15,
  23,
  24,
  26,
  38,
  39,
  41,
  43,
  44,
  45,
  51,
  52,
  55,
  61,
  63]}

In [7]:
%time _ = eall.apply_sync(lambda : None)

CPU times: user 140 ms, sys: 4.23 ms, total: 144 ms
Wall time: 413 ms


In [8]:
import numpy as np
data = np.random.random((512, 512))

In [30]:
%%time
ar = rc[:].push({'data': data}, block=False)
ar.wait_interactive()

_push:  98%|█████████▊| 63/64 [00:02<00:00, 23.82tasks/s]

CPU times: user 679 ms, sys: 429 ms, total: 1.11 s
Wall time: 2.97 s





In [31]:
%px import numpy as np

In [32]:
@ipp.interactive
def array_to_file(A, name):
    """write an array to a temporary file, return its filename"""
    import tempfile
    with tempfile.NamedTemporaryFile(suffix='.np', delete=False) as tf:
        np.save(tf, A)
        data_path = tf.name
    if name:
        globals()[name] = data_path
    return data_path

In [38]:
@ipp.interactive
def load_memmap(name, path, mode='r+'):
    """load a file on disk into the interactive namespace as a memmapped array"""
    globals()[name] = np.memmap(path, mode=mode)

In [41]:
def bcast_memmap(data, name, client, host_engines):
    """broadcast a numpy array efficiently
    
    - sends data to each remote host only once
    - loads with memmap everywhere
    """

    # actually push the data, just once to each machine
    memmap_path_name = f"_bcast_array_{name}"
    
    one_per_host = rc.broadcast_view([engines[0] for engines in host_engines.values()], coalescing=True)
    send_ar = one_per_host.apply_async(array_to_file, data, name=memmap_path_name)
    
    # load the data on all engines into a memmapped array
    async_results = []
    e_all = rc.broadcast_view(coalescing=True)
    return e_all.apply_async(load_memmap, name, ipp.Reference(memmap_path_name))


In [43]:
%%time
ar = bcast_memmap(data, 'data', rc, host_engines)
ar.wait_interactive()

load_memmap: 100%|██████████| 64/64 [00:01<00:00, 38.44tasks/s]

CPU times: user 309 ms, sys: 112 ms, total: 421 ms
Wall time: 1.75 s





In [26]:
%px np.linalg.norm(data, 2)

[0;31mOut[0:2]: [0m217739.80811050607

[0;31mOut[1:2]: [0m217739.80811050607

[0;31mOut[2:2]: [0m217739.80811050607

[0;31mOut[3:2]: [0m217739.80811050607

[0;31mOut[4:2]: [0m217739.80811050607

[0;31mOut[5:2]: [0m217739.80811050607

[0;31mOut[6:2]: [0m217739.80811050607

[0;31mOut[7:2]: [0m217739.80811050607

[0;31mOut[8:2]: [0m217739.80811050607

[0;31mOut[9:2]: [0m217739.80811050607

[0;31mOut[10:2]: [0m217739.80811050607

[0;31mOut[11:2]: [0m217739.80811050607

[0;31mOut[12:2]: [0m217739.80811050607

[0;31mOut[13:2]: [0m217739.80811050607

[0;31mOut[14:2]: [0m217739.80811050607

[0;31mOut[15:2]: [0m217739.80811050607

[0;31mOut[16:2]: [0m217739.80811050607

[0;31mOut[17:2]: [0m217739.80811050607

[0;31mOut[18:2]: [0m217739.80811050607

[0;31mOut[19:2]: [0m217739.80811050607

[0;31mOut[20:2]: [0m217739.80811050607

[0;31mOut[21:2]: [0m217739.80811050607

[0;31mOut[22:2]: [0m217739.80811050607

[0;31mOut[23:2]: [0m217739.80811050607

[0;31mOut[24:2]: [0m217739.80811050607

[0;31mOut[25:2]: [0m217739.80811050607

[0;31mOut[26:2]: [0m217739.80811050607

[0;31mOut[27:2]: [0m217739.80811050607

[0;31mOut[28:2]: [0m217739.80811050607

[0;31mOut[29:2]: [0m217739.80811050607

[0;31mOut[30:2]: [0m217739.80811050607

[0;31mOut[31:2]: [0m217739.80811050607

[0;31mOut[32:2]: [0m217739.80811050607

[0;31mOut[33:2]: [0m217739.80811050607

[0;31mOut[34:2]: [0m217739.80811050607

[0;31mOut[35:2]: [0m217739.80811050607

[0;31mOut[36:2]: [0m217739.80811050607

[0;31mOut[37:2]: [0m217739.80811050607

[0;31mOut[38:2]: [0m217739.80811050607

[0;31mOut[39:2]: [0m217739.80811050607

[0;31mOut[40:2]: [0m217739.80811050607

[0;31mOut[41:2]: [0m217739.80811050607

[0;31mOut[42:2]: [0m217739.80811050607

[0;31mOut[43:2]: [0m217739.80811050607

[0;31mOut[44:2]: [0m217739.80811050607

[0;31mOut[45:2]: [0m217739.80811050607

[0;31mOut[46:2]: [0m217739.80811050607

[0;31mOut[47:2]: [0m217739.80811050607

[0;31mOut[48:2]: [0m217739.80811050607

[0;31mOut[49:2]: [0m217739.80811050607

[0;31mOut[50:2]: [0m217739.80811050607

[0;31mOut[51:2]: [0m217739.80811050607

[0;31mOut[52:2]: [0m217739.80811050607

[0;31mOut[53:2]: [0m217739.80811050607

[0;31mOut[54:2]: [0m217739.80811050607

[0;31mOut[55:2]: [0m217739.80811050607

[0;31mOut[56:2]: [0m217739.80811050607

[0;31mOut[57:2]: [0m217739.80811050607

[0;31mOut[58:2]: [0m217739.80811050607

[0;31mOut[59:2]: [0m217739.80811050607

[0;31mOut[60:2]: [0m217739.80811050607

[0;31mOut[61:2]: [0m217739.80811050607

[0;31mOut[62:2]: [0m217739.80811050607

[0;31mOut[63:2]: [0m217739.80811050607

You can also do the same thing [with MPI](MPI%20Broadcast.ipynb).