In [5]:
import pprint
import tempfile
import numpy as np
import pescador
import time
import os

n_samples = [100, 1000, 10000, 20000, 50000]
sample_shapes = [(1, 10), (1, 10, 10), (1, 6, 26, 26)]

n_to_test = 1000
buffer_size = 128

In [2]:
def create_npy(n_samples, sample_shapes, filename):
    shape = list(sample_shapes)
    shape[0] = n_samples
    arrays_to_save = np.random.random(shape)
    
    np.save(filename, arrays_to_save)
    
def npy_gen(filename, mmap='r'):
    data_in = np.load(filename, mmap_mode=mmap)
    
    while True:
        i = np.random.randint(len(data_in))
        yield dict(X=np.array(data_in[i]))

In [6]:
create_times = {}
generator_sample_times = {}
stream_sample_times = {}
batch_sample_times = {}
with tempfile.TemporaryDirectory() as tmpdir:
    for n in n_samples:
        for s in sample_shapes:
            key = (n, s)
            filename = "random_{}_{}.npy".format(n, "-".join([str(x) for x in s]))
            filepath = os.path.join(tmpdir, filename)
            
            # Create the test file
            c0 = time.time()
            create_npy(n, s, filepath)
            create_times[key] = time.time() - c0
            print(filename, "created in", "{}s".format(create_times[key]))
            
            # Sample from it with no streamer
            sample_times = []
            s0 = time.time()
            for i, batch in enumerate(npy_gen(filepath)):
                sample_times.append(time.time() - s0)
                
                if i > n_to_test:
                    break
                s0 = time.time()
            
            generator_sample_times[key] = np.mean(sample_times)
            print(filename, "generator sampling average:", "{:.7f}s".format(generator_sample_times[key]))
            
            # Sample from it with a basic streamer
            streamer = pescador.Streamer(npy_gen(filepath))
            sample_times = []
            s0 = time.time()
            for batch in streamer.generate(max_batches=n_to_test):
                sample_times.append(time.time() - s0)
                s0 = time.time()
                
            stream_sample_times[key] = np.mean(sample_times)
            print(filename, "streamer sampling average:", "{:.7f}s".format(stream_sample_times[key]))
                
            # sample from it with a batch streamer
            buffstream = pescador.BufferedStreamer(streamer, buffer_size)
            sample_times = []
            s0 = time.time()
            for batch in buffstream.generate(max_batches=n_to_test):
                sample_times.append(time.time() - s0)
                s0 = time.time()
                
            batch_sample_times[key] = np.mean(sample_times)
            print(filename, "buffered sampling average:", "{:.7f}s".format(batch_sample_times[key]))
            print(filename, "buffered sampling per sample average:", "{:.7f}s".format(batch_sample_times[key] / buffer_size))

random_100_1-10.npy created in 0.0006449222564697266s
random_100_1-10.npy generator sampling average: 0.0000258s
random_100_1-10.npy streamer sampling average: 0.0000085s
random_100_1-10.npy buffered sampling average: 0.0001491s
random_100_1-10.npy buffered sampling per sample average: 0.0000012s
random_100_1-10-10.npy created in 0.0012819766998291016s
random_100_1-10-10.npy generator sampling average: 0.0000280s
random_100_1-10-10.npy streamer sampling average: 0.0000100s
random_100_1-10-10.npy buffered sampling average: 0.0001468s
random_100_1-10-10.npy buffered sampling per sample average: 0.0000011s
random_100_1-6-26-26.npy created in 0.020808935165405273s
random_100_1-6-26-26.npy generator sampling average: 0.0000350s
random_100_1-6-26-26.npy streamer sampling average: 0.0000130s
random_100_1-6-26-26.npy buffered sampling average: 0.0004777s
random_100_1-6-26-26.npy buffered sampling per sample average: 0.0000037s
random_1000_1-10.npy created in 0.0006899833679199219s
random_1000_

In [None]:
pprint.pprint(create_times)
pprint.pprint(generator_sample_times)
pprint.pprint(stream_sample_times)
pprint.pprint(batch_sample_times)