In [1]:
import numpy as np
import tensorflow as tf
import os
from math import ceil
from itertools import count

from sketch_io import train_data, valid_data

# Folders for storage/retrival
main_directory  = '../'
checkpoints_directory = main_directory + 'checkpts/'
features_directory    = main_directory + 'features/'
tr_labels_dir = features_directory + 'tr_labels.npy'
ev_labels_dir = features_directory + 'ev_labels.npy'
tr_sketches_dir = features_directory + 'tr_sketches.npy'
ev_sketches_dir = features_directory + 'ev_sketches.npy'

# Feature choices
NUM_SAMPLES = 1000000
BATCH_SIZE  =     400
NUM_TRAIN_BATCHES = ceil(NUM_SAMPLES / BATCH_SIZE)

In [2]:
# (keys,     np.uint64), 
# (lengths,  np.uint16), 
# (sketches, np.uint8), 
# (labels,   np.uint16))]
names = ['keys', 'lengths', 'sketches', 'labels']
tr_filenames = [features_directory + 'tr_' + name + '.npy' for name in names]
ev_filenames = [features_directory + 'ev_' + name + '.npy' for name in names]

In [3]:
#################################
# Write images & labels to file #
#################################

tr_key, _, tr_length, tr_sketch, tr_label = train_data(batch_size=BATCH_SIZE, epochs=1, max_seqlen=300)
ev_key, _, ev_length, ev_sketch, ev_label = valid_data(batch_size=BATCH_SIZE, epochs=1, max_seqlen=300)

def get_np_arrays(key_op, length_op, sketch_op, label_op, num_to_take=None):
    keys, lengths, sketches, labels = [], [], [], []
    num_batches = NUM_TRAIN_BATCHES if num_to_take else None
    with tf.Session() as sess: 
        sess.run(tf.global_variables_initializer())
        sess.run(tf.tables_initializer())
        
        for i in count():
            if (i+1) % 200 == 0: print("Step {:7d}".format(i+1))
            try:
                keys_out, lengths_out, sketches_out, labels_out = \
                    sess.run([key_op, length_op, sketch_op, label_op])
                keys.append(keys_out)
                lengths.append(lengths_out)
                sketches.append(sketches_out)
                labels.append(labels_out)
                
                if num_to_take: # Stop early?
                    if len(keys) > ceil(num_to_take / BATCH_SIZE): 
                        return (np.concatenate(v).astype(dtype)[:num_to_take] \
                                for (v,dtype) in ((keys,     np.uint64), 
                                                  (lengths,  np.uint16), 
                                                  (sketches, np.uint8), 
                                                  (labels,   np.uint16)))
            except tf.errors.OutOfRangeError:
                print("Finished epoch")
                return (np.concatenate(v).astype(dtype) \
                                for (v,dtype) in ((keys,     np.uint64), 
                                                  (lengths,  np.uint16), 
                                                  (sketches, np.uint8), 
                                                  (labels,   np.uint16)))

def store_to_file(filenames, data):
    for fname, val in zip(filenames, data):
        print("Storing to file {}. Data shape: {}".format(fname, val.shape))
        np.save(fname, val)
        
# Have our solutions already?
if any([not os.path.exists(d) for d in tr_filenames + ev_filenames]):
    if not os.path.exists(features_directory):
        os.makedirs(features_directory)
    print("Storing sketches to np")
    tr_data = get_np_arrays(tr_key, tr_length, tr_sketch, tr_label, num_to_take=NUM_SAMPLES)
    ev_data = get_np_arrays(ev_key, ev_length, ev_sketch, ev_label)
    
    store_to_file(tr_filenames, tr_data)
    store_to_file(ev_filenames, ev_data)
    print("Finished")

else:
    print("Already parsed the sketches")

Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
Storing sketches to np
Step     200
Step     400
Step     600
Step     800
Step    1000
Step    1200
Step    1400
Step    1600
Step    1800
Step    2000
Step    2200
Step    2400
Step     200
Step     400
Step     600
Step     800
Step    1000
Step    1200
Step    1400
Step    1600
Step    1800
Step    2000
Step    2200
Step    2400
Step    2600
Step    2800
Step    3000
Step    3200
Step    3400
Step    3600
Step    3800
Step    4000
Step    4200
Finished epoch
Storing to file ../features/tr_keys.npy. Data shape: (1000000,)
Storing to file ../features/tr_lengths.npy. Data shape: (1000000,)
Storing to file ../features/tr_sketches.npy. Data shape: (1000000, 300, 3)
Storing to file ../features/tr_labels.npy. Data shape: (1000000,)
Storing to file ../features/ev_keys.npy. Data shape: (1700000,)
Storing to file ../features/ev_lengths.npy. Data shape: (1700000,)
Storing to file ../features/ev_

In [9]:
tr_data, ev_data = {}, {}

for (name, tr_file) in zip(names, tr_filenames):
    tr_data[name] = np.load(tr_file)

for (name, ev_file) in zip(names, ev_filenames):
    ev_data[name] = np.load(ev_file)

In [13]:
for name, data in tr_data.items(): print(name, data.shape)

labels (1000000,)
lengths (1000000,)
sketches (1000000, 300, 3)
keys (1000000,)


In [14]:
for name, data in ev_data.items(): print(name, data.shape)

labels (1700000,)
lengths (1700000,)
sketches (1700000, 300, 3)
keys (1700000,)
