In [10]:
from sys import getsizeof
import array
from tf.timestamp import Timestamp
tm = Timestamp()

In [47]:
def tinf(msg, reset=False):
    if reset:
        tm.indent(reset=True)
    tm.info(msg)

def sizes(bunch):
    tc = 0
    for x in bunch:
        lc = getsizeof(x)
        tc += lc
        print('{:<10}: {:>10}'.format('component', lc))
    print('{:<10}: {:>10}'.format('total', tc))


def fill():
    for i in range(100): yield i
    for i in range(200,300): yield i
    for i in range(500, 1001): yield i
    for i in (1002, 1004, 1006, 1008, 1010, 1012): yield i
    for i in range(1012, 2000): yield 2*i
    for i in range(4000, 1002305): yield i

def testcases(): return range(1000, 1011)

def check(bunch, getter):
    for i in testcases():
        print('{}={}'.format(repr(i), repr(getter(bunch, i))))

def times(bunch, getter):
    tinf('Getting all keys', reset=True)
    for i in range(1000000):
        x = getter(bunch, i)
    tinf('Done')

In [50]:
def optimize1(bd):
    values = []
    keys = array.array('I')
    index = 0
    for i in sorted(bd):
        value = bd[i]
        values.append(value)
        keys.append(index)
        index += len(value)
    return (keys, ''.join(values))

def get1(optD, i):
    (keys, values) = optD
    index = keys[i]
    index2 = keys[i+1] if i+1 < len(keys) else len(values)
    value = values[index:index2]
    return value

In [51]:
def optimize2(bd):
    values = []
    keys = array.array('I')
    lengths = array.array('H')
    index = 0
    for i in sorted(bd):
        value = bd[i]
        values.append(value)
        keys.append(index)
        lval = len(value)
        lengths.append(lval)
        index += lval
    return (keys, lengths, ''.join(values))

def get2(optD, i):
    (keys, lengths, values) = optD
    index = keys[i]
    length = lengths[i]
    return values[index:index+length]

In [52]:
def optimize3(bd):
    keys = {}
    values = []

    valindexes = array.array('I')
    vallengths = array.array('H')
    valindex = 0
    index = 0
    
    for i in sorted(bd):
        keys[i] = index

        value = bd[i]
        lval = len(value)

        values.append(value)
        valindexes.append(valindex)
        valindex += lval
        vallengths.append(lval)
        index +=1
    return (keys, valindexes, vallengths, ''.join(values))

def get3(optD, i):
    (keys, valindexes, vallengths, values) = optD
    index = keys[i]
    valindex = valindexes[i]
    vallength = vallengths[i]
    return values[valindex:valindex+vallength]

In [54]:
bunch = dict((i, str(i)) for i in fill())
sizes((bunch,))
check(bunch, lambda data, i: data.get(i, None))
times(bunch)

component :   50331744
total     :   50331744
1000='1000'
1001=None
1002='1002'
1003=None
1004='1004'
1005=None
1006='1006'
1007=None
1008='1008'
1009=None
1010='1010'
  0.00s Getting all keys
  0.16s Done


In [55]:
bunch1 = optimize1(bunch)
sizes(bunch1)
check(bunch1, get1)
timesB(bunch1)

component :    4091932
component :    6445138
total     :   10537070
1000=b'2610'
1001=b'2612'
1002=b'2614'
1003=b'2616'
1004=b'2618'
1005=b'2620'
1006=b'2622'
1007=b'2624'
1008=b'2626'
1009=b'2628'
1010=b'2630'
  0.00s Getting all keys
  1.60s Done


In [17]:
bunch2 = optimize2(bunch)
sizes(bunch2)
check(bunch2, get2)
timesU(bunch2)

component :    4091932
component :    5888939
total     :    9980871
  0.00s Getting all keys
  0.62s Done


In [18]:
bunch3 = optimize3(bunch)
sizes(bunch3)
timesU2(bunch3)

component :    4091932
component :    2045998
component :    5888939
total     :   12026869
  0.00s Getting all keys
  0.43s Done


In [19]:
x = 1
getsizeof(1)

28

In [24]:
x = array.array('I', range(100))

In [25]:
getsizeof(x)

472

In [22]:
x.append(1)