In [1]:
import os
from classification_models.tfkeras import Classifiers

ResNet34, preprocess_input = Classifiers.get('resnet34')

In [2]:
log_dir = "logs/"
os.makedirs(log_dir, exist_ok=True)

%reload_ext tensorboard
%tensorboard --logdir {log_dir} --reload_multifile True --host 0.0.0.0 --port 6006

ERROR: Failed to launch TensorBoard (exited with 255).
Contents of stderr:
E0714 08:36:20.467581 140201875167040 program.py:311] TensorBoard could not bind to port 6006, it was already in use
ERROR: TensorBoard could not bind to port 6006, it was already in use

In [3]:
# Run only for training!
import glob
import math
import datetime
import numpy as np
import tensorflow as tf

# Change parameters here...
data_dir = '../data/' # Change
sizex = 500
sizey = 500
sizez = 3
batch_size = 32
nb_epochs = 50
validation_ratio = 0.3

In [4]:
numpy_datasets = glob.glob(data_dir+'dataset*')

In [5]:
for dataset in numpy_datasets[:4]:
    try:
        with np.load(dataset) as data:
            x_dust   = np.append( x_dust, data['x_dust'],axis=0)
            x_nodust = np.append( x_nodust, data['x_nodust'],axis=0)
            y        = np.append( y, data['y'],axis=0)
            ids      = np.append( ids, data['id'],axis=0)
    except:
        with np.load(dataset) as data:
            x_dust   = data['x_dust']
            x_nodust = data['x_nodust']
            y        = data['y']
            ids      = data['id']

In [6]:
data_dict = {}
for dataset in numpy_datasets[:4]:
    with np.load(dataset) as data:
        if 'x_dust' in data_dict.keys():
            data_dict['x_dust']   = np.append( data_dict['x_dust'], data['x_dust'],axis=0)
            data_dict['x_nodust'] = np.append( data_dict['x_nodust'], data['x_nodust'],axis=0)
            data_dict['y']        = np.append( data_dict['y'], data['y'],axis=0)
            data_dict['ids']      = np.append( data_dict['ids'], data['id'],axis=0)
        else:
            data_dict['x_dust']   = data['x_dust']
            data_dict['x_nodust'] = data['x_nodust']
            data_dict['y']        = data['y']
            data_dict['ids']      = data['id']

In [7]:
x_dust.shape

(100, 500, 500, 3)

In [8]:
data_dict['x_dust'].shape

(100, 500, 500, 3)

In [9]:
dataset = tf.data.Dataset.from_tensor_slices(data_dict)
tf.data.Dataset.from_tensor_slices(data_dict)

<TensorSliceDataset shapes: {x_dust: (500, 500, 3), x_nodust: (500, 500, 3), y: (3,), ids: ()}, types: {x_dust: tf.float64, x_nodust: tf.float64, y: tf.float64, ids: tf.string}>

In [None]:
dataset = dataset.batch(batch_size)

In [3]:
#tf.config.threading.set_inter_op_parallelism_threads(1)
#tf.config.threading.set_intra_op_parallelism_threads(1)
AUTO = tf.data.experimental.AUTOTUNE

def parse_images(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_png(img, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)

    this_idx = tf.strings.split(image_path, os.path.sep)[-2]
    label = tf.one_hot(int(this_idx), 3, on_value=1, off_value=0, axis=-1)
    return (img, label)

# Not in TF 2.2.0 yet
#train_ds = tf.keras.preprocessing.image_dataset_from_directory(
#    data_dir,
#    batch_size=batch_size,
#    image_size=(sizey, sizex),
#    shuffle=True,
#    seed=None,
#    validation_split=0.2,
#    subset='training',
#    interpolation='nearest'
#)

ds_size = len(all_files)
input_ds = (
    tf.data.Dataset.from_tensor_slices((all_files))
                   .map(parse_images, num_parallel_calls=AUTO)
                    # Use seed to ensure we always have the same validation data set!
                   .shuffle(ds_size, seed = 123, reshuffle_each_iteration = False)
)

val_size = math.floor(ds_size * validation_ratio)

validation_ds = (
    input_ds.take(val_size).batch(batch_size)
)
train_ds = (
    input_ds.skip(val_size)
)

train_ds_aug = (
    train_ds.shuffle(ds_size - val_size, reshuffle_each_iteration=True)
            .map(augment, num_parallel_calls=AUTO)
            .batch(batch_size)
            .prefetch(AUTO)
)

# Weight classes for loss function
label_dict = {}
for image, label in train_ds.take(-1):
    numpy_label = np.array2string(label.numpy())
    if numpy_label in label_dict:
        label_dict[numpy_label] += 1
    else:
        label_dict[numpy_label] = 1
print(label_dict)

sum_0 = label_dict['[1 0 0]']
sum_1 = label_dict['[0 1 0]']
sum_2 = label_dict['[0 0 1]']
total_sum = sum_0 + sum_1 + sum_2
print("Total # of samples: ", total_sum)
class_weight = {0: (total_sum / 3) / sum_0,
                1: (total_sum / 3) / sum_1,
                2: (total_sum / 3) / sum_2}
print("Class weights: ", class_weight)

# Build model
base_model = ResNet34(input_shape=(sizey,sizex,3), weights='imagenet', include_top=False)

# TODO: Only retrain the last X layers (e.g. last 30)
#for l in base_model.layers[:-30]:
#    l.trainable = False
#for layer in base_model.layers:
#    print(layer, layer.trainable)

x = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
output = tf.keras.layers.Dense(3, activation='softmax')(x)
model = tf.keras.models.Model(inputs=[base_model.input], outputs=[output])

# Use existing best weights if available...
snapshot_weights = 'best_weights.hdf5'
if os.path.isfile(snapshot_weights):
    model.load_weights(snapshot_weights)


model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])

# For tensorboard
log_dir_s = log_dir + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
os.makedirs(log_dir_s, exist_ok=True)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir_s, histogram_freq=1, update_freq='batch', profile_batch=0)

# Checkpointing
checkpointer = tf.keras.callbacks.ModelCheckpoint(filepath=snapshot_weights,
                               monitor='val_accuracy',
                               verbose=1,
                               save_best_only=True)

callbacks = [
    tensorboard_callback,
    checkpointer
]

model.fit(
    train_ds_aug,
    validation_data = validation_ds,
    epochs = nb_epochs,
    callbacks = callbacks,
    class_weight=class_weight)

model.save('trained.h5') # Not really needed as we have the weights already