# Experiments putting large numpy arrays into key-value stores


I want something that is fast (in RAM when possible) and also can persist to disk if necessary/not enough RAM.
And it should handle large image/volume data.

## Partd

See https://github.com/dask/partd


## Redis 

https://stackoverflow.com/questions/55311399/fastest-way-to-store-a-numpy-array-in-redis

## Other key value stores to try:

* KVstore in Mxnet https://mxnet.incubator.apache.org/versions/master/tutorials/python/kvstore.html

# TODO

* try distributed between processes/machines
* try with redis



In [4]:
import partd
import numpy as np

# this is adapted from the redis example on the stackoverflow answer
# this assumes only 2D arrays. Would have to pack slighty differently for 
# nd-array, e.g. pack len(shape) as first byte/short + shape + a.tobytes()


def toPartd(p,a,n):
   """Store given Numpy array 'a' in PartD under key 'n'"""
   h, w = a.shape
   shape = struct.pack('>II',h,w)
   encoded = shape + a.tobytes()

   # Store encoded data in Redis
   p.append({n : encoded})
   return

def fromPartd(p,n):
   """Retrieve Numpy array from PartD key 'n'"""
   encoded = p.get(n)
   h, w = struct.unpack('>II',encoded[:8])
   a = np.frombuffer(encoded, dtype=np.uint16, offset=8).reshape(h,w)
   return a


In [5]:
p = partd.Dict()


In [7]:
import numpy as np

In [8]:
# Create 80x80 numpy array to store
a0 = np.arange(6400,dtype=np.uint16).reshape(80,80) 


In [9]:
toPartd(p, a0, "a0")

In [10]:
fromPartd(p, "a0")

array([[   0,    1,    2, ...,   77,   78,   79],
       [  80,   81,   82, ...,  157,  158,  159],
       [ 160,  161,  162, ...,  237,  238,  239],
       ...,
       [6160, 6161, 6162, ..., 6237, 6238, 6239],
       [6240, 6241, 6242, ..., 6317, 6318, 6319],
       [6320, 6321, 6322, ..., 6397, 6398, 6399]], dtype=uint16)

Seems to work in principle. Try with something larger...

In [12]:
p.data.keys()

dict_keys(['a0'])

In [13]:
a1 = np.arange(5000*5000,dtype=np.uint16).reshape(5000,5000) 

In [14]:
toPartd(p,a1, "a1")

In [15]:
fromPartd(p, "a1")

array([[    0,     1,     2, ...,  4997,  4998,  4999],
       [ 5000,  5001,  5002, ...,  9997,  9998,  9999],
       [10000, 10001, 10002, ..., 14997, 14998, 14999],
       ...,
       [15784, 15785, 15786, ..., 20781, 20782, 20783],
       [20784, 20785, 20786, ..., 25781, 25782, 25783],
       [25784, 25785, 25786, ..., 30781, 30782, 30783]], dtype=uint16)

In [2]:
# Redis example from stackoverflow answer



In [3]:

import struct
import redis
import numpy as np

def toRedis(r,a,n):
   """Store given Numpy array 'a' in Redis under key 'n'"""
   h, w = a.shape
   shape = struct.pack('>II',h,w)
   encoded = shape + a.tobytes()

   # Store encoded data in Redis
   r.set(n,encoded)
   return

def fromRedis(r,n):
   """Retrieve Numpy array from Redis key 'n'"""
   encoded = r.get(n)
   h, w = struct.unpack('>II',encoded[:8])
   a = np.frombuffer(encoded, dtype=np.uint16, offset=8).reshape(h,w)
   return a

# Create 80x80 numpy array to store
a0 = np.arange(6400,dtype=np.uint16).reshape(80,80) 

# Redis connection
r = redis.Redis(host='localhost', port=6379, db=0)

# Store array a0 in Redis under name 'a0array'
toRedis(r,a0,'a0array')

# Retrieve from Redis
a1 = fromRedis(r,'a0array')

ModuleNotFoundError: No module named 'redis'