In [1]:
from bisect import bisect_left
from timeit import timeit
from tf.app import use
from pack import deepSize

In [2]:
A = use('bhsa:clone', silent='deep')

In [3]:
F = A.api.F
nametype = F.nametype
data = nametype.data
print(deepSize(data))

2380461


In [4]:
indices = []
values = []
for i in sorted(data):
    indices.append(i)
    values.append(data[i])
print(indices[0:10])
print(values[0:10])

print(deepSize(indices))
print(deepSize(values))

[740, 765, 793, 817, 821, 834, 867, 885, 886, 893]
['pers', 'pers', 'pers', 'pers', 'topo', 'pers', 'topo', 'topo', 'ppde', 'topo']
1390248
321597


Given a list of valid indices and a list of all values, we can look up all values by means of bisect.

The get function is surprisingly simple and quite fast.

In [5]:
def testPerformanceTf1():
    times = 1000000
    v = nametype.v
    
    def w():
        for i in range(700, 1700):
            x = v(i)
            
    key = 740
    t1 = timeit("v(key)", globals=locals(), number=times)
    key = 739
    t2 = timeit("v(key)", globals=locals(), number=times)
    t3 = timeit("w()", globals=locals(), number=10000)
    return (t1, t2, t3)

testPerformanceTf1()

(0.25589227000000037, 0.15479197999999883, 1.7032816670000024)

In [6]:
def testPerformanceBs():
    times = 1000000
    inds = indices
    vals = values
    
    def v(i):
        j = bisect_left(inds, i)
        k = inds[j]
        return vals[j] if k == i else None
    
    def w():
        for i in range(700, 1700):
            x = v(i)
            
    key = 740
    t1 = timeit("v(key)", globals=locals(), number=times)
    key = 739
    t2 = timeit("v(key)", globals=locals(), number=times)
    t3 = timeit("w()", globals=locals(), number=10000)
    return (t1, t2, t3)

testPerformanceBs()

(0.47409529600000155, 0.4525280639999991, 4.312796224000003)

# Observation

The performance degradation is a factor of **4**, but no more memory is used (rather a bit less).
More over, instead of a dict we have two lists, which we can manage in a separate process by means of SharableList.


In [7]:
from multiprocessing import shared_memory

indicesM = shared_memory.ShareableList(indices, name="inds")
valuesM = shared_memory.ShareableList(values, name="vals")