In [10]:
import matplotlib.pyplot as plt
import numpy as np
import mymodels
import tensorflow as tf

In [11]:
main_folder = '../'
dataset_folder = main_folder + 'dataset_tfrecord_small/'
dataset2_folder = main_folder + 'dataset2/'
logs_folder = main_folder + 'logs/'
checkpoints_folder = main_folder + 'checkpoints_sect1/'

In [12]:
def get_tensor(dataset):
    tensor= []
    for batch in dataset.take(-1):
        batch = np.array(batch)
        tensor.append(batch)

    tensor = np.array(tensor)
    return tensor

def load_tfrecord(filename, dataset_type=tf.int32):
    parse_tensor = lambda x: tf.io.parse_tensor(x, dataset_type)
    return tf.data.TFRecordDataset(filename).map(parse_tensor)
    
def make_tf_dataset(X, y):
    dataset = tf.data.Dataset.zip((X, y))
    dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
    return dataset

In [13]:
X = load_tfrecord(dataset_folder + 'train_image.tfrecord', dataset_type=tf.double)
y = load_tfrecord(dataset_folder + 'train_label.tfrecord', dataset_type=tf.int64)

X_test = load_tfrecord(dataset_folder + 'test_image.tfrecord', dataset_type=tf.double)
y_test = load_tfrecord(dataset_folder + 'test_label.tfrecord', dataset_type=tf.int64)

train_dataset = make_tf_dataset(X, y)
val_dataset = make_tf_dataset(X_test, y_test)

In [14]:
for i, a in enumerate(train_dataset.take(1)):
    x,y = a
    print(x.shape, y.shape)
    break

(128, 128, 128, 1) (128, 10)


In [15]:
weights = checkpoints_folder + 'sect1_epoch_100.weights.h5'
model = mymodels.sect1()
model.compile()
model.load_weights(weights)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 126, 126, 32)      320       
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 63, 63, 32)       0         
 2D)                                                             
                                                                 
 conv2d_3 (Conv2D)           (None, 61, 61, 64)        18496     
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 30, 30, 64)       0         
 2D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 57600)             0         
                                                                 
 dense_3 (Dense)             (None, 128)              

In [16]:
def crop_images(X_train_canvas, batch):
    X_cropped = []
    crop_amount = 32
    base_grace = 5

    small_data = X_train_canvas #[:100]

    for i in range(small_data.shape[0]):
        grace = base_grace

        image = small_data[i]
        coords = model.predict(image.reshape(1, 128, 128, 1))
        x = int(coords[0][0])
        y = int(coords[0][1])
        if x > 128-crop_amount-grace:
            x = 128-crop_amount-grace
        if x < grace:
            x = grace
        if y > 128-crop_amount-grace:
            y = 128-crop_amount-grace
        if y < grace:
            y = grace
        
        cropped_image = image[y-grace:y+crop_amount+grace, x-grace:x+crop_amount+grace]

        if cropped_image.shape != (42,42,1):
            print(f"Error: {cropped_image.shape}")
            print(x,y)
            plt.imshow(cropped_image)
            plt.show()
            plt.imshow(image)
            plt.show()
        X_cropped.append(cropped_image)
        print(f"\rNum: {i+1} / {small_data.shape[0]} Batch: {batch} ", end='')

    X_cropped = np.array(X_cropped)
    return X_cropped

In [17]:
def _bytes_feature(value):
  """Returns a bytes_list from a string / byte."""
  if isinstance(value, type(tf.constant(0))):
    value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def _int64_feature(value):
    value = value.flatten()  # Flatten the entire batch into a 1D list
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))  # Convert to list




def example_test(image, label):
    feature = {
        'image': _bytes_feature(image),
        'label': _bytes_feature(label)
    }
    #print(f'feature: {feature["label"]}')
    return tf.train.Example(features=tf.train.Features(feature=feature))

def write_in_batches(data, filename, batch_size=100):
    #print(data.shape)
    record_file = filename
    with tf.io.TFRecordWriter(record_file) as writer:
        # Iterate through the dataset in batches
        for i, set in enumerate(data.take(-1)):
            images, labels = set
            batch = np.array(images)
            #batch = crop_images(batch, i)
            serialized_image = tf.io.serialize_tensor(batch).numpy()
            
            labels = np.array(labels)
            labels_unonehot = np.argmax(labels, axis=-1)
            labels_class_indices = tf.io.serialize_tensor(labels_unonehot).numpy()

            tf_example = example_test(serialized_image, labels_class_indices)
            writer.write(tf_example.SerializeToString())

In [18]:
write_in_batches(train_dataset, 'train_dataset_cropped.tfrecord')
write_in_batches(val_dataset, 'test_dataset_cropped.tfrecord')