In [6]:
%%writefile src/check_size_np_structured.py

import numpy as np
from faker import Faker
import random
import sys
from memory_profiler import profile

#@profile
def generate_fake_data(num_records):

    dt = np.dtype([('name','S20'),
                   ('city','S20'),
                   ('x','f8'),
                   ('y','f8'),
                   ('z','f8')])
    
    fake = Faker()
    record_size = 20+20+8+8+8

    data = np.zeros((num_records,), dtype=dt)
    for i in range(num_records):
        name = fake.name()
        city = fake.city()
        #date_time = fake.date_time_between(start_date="-30d", end_date="now", tzinfo=None)
        x = random.uniform(0.0, 100.0)
        y = random.uniform(0.0, 100.0)
        z = random.uniform(0.0, 100.0)
        data[i] = (name, city, x, y, z)

    return data,record_size

#@profile
def main():
    n = int(sys.argv[1])
    data, rec_size = generate_fake_data(n)
    print("Record size: {} Bytes".format(rec_size))
    print("Data size: {:.3f} MB".format(data.nbytes*1e-6))
    data = None

if __name__ == "__main__":
    main()

Writing src/check_size_np_structured.py


In [7]:
%%bash
python src/check_size_np_structured.py 1000

Record size: 64 Bytes
Data size: 0.064 MB


In [8]:
%%writefile src/check_size_np_array.py

import numpy as np
from faker import Faker
import random
import sys
#from memory_profiler import profile
import psutil


@profile
def main():
    n = int(sys.argv[1])
    shape = (n,256,256,3)

    print("free Memory: {:.3f} MB".format(psutil.virtual_memory().available*1e-6))
    print("Required memory: {:.3f} MB".format(n*256*256*3*8*1e-6))
    data = np.random.random(size=shape).astype(np.float64)
    print("Data size: {:.3f} MB".format(data.nbytes*1e-6))
    #print(n*10*8*1e-6)
    data = None

if __name__ == "__main__":
    main()

Writing src/check_size_np_array.py


In [9]:
%%bash
python src/check_size_np_array.py 3000

free Memory: 12726.280 MB
Required memory: 4718.592 MB
Data size: 4718.592 MB


In [102]:
%%bash
# do not use the bash built in time. do apt-install time
/usr/bin/time --format="Memory used %M Kb" python src/large_np_array.py 3000

free Memory: 14180.233 MB
Required memory: 4718.592 MB
Data size: 4718.592 MB


Memory used 9274164 Kb


In [59]:
!free -h

               total        used        free      shared  buff/cache   available
Mem:            15Gi       1.9Gi        13Gi       3.0Mi       389Mi        13Gi
Swap:          4.0Gi       1.4Gi       2.6Gi
