In [None]:
import os
from classification_models.tfkeras import Classifiers

ResNet34, preprocess_input = Classifiers.get('resnet34')

In [2]:
log_dir = "logs/"
os.makedirs(log_dir, exist_ok=True)

%reload_ext tensorboard
%tensorboard --logdir {log_dir} --reload_multifile True --host 0.0.0.0 --port 6006

ERROR: Failed to launch TensorBoard (exited with 255).
Contents of stderr:
E0714 09:07:32.289144 140375052138304 program.py:311] TensorBoard could not bind to port 6006, it was already in use
ERROR: TensorBoard could not bind to port 6006, it was already in use

In [3]:
# Run only for training!
import glob
import math
import datetime
import numpy as np
import tensorflow as tf

# Change parameters here...
data_dir = '../data/' # Change
sizex = 500
sizey = 500
sizez = 3
batch_size = 32
nb_epochs = 50
validation_ratio = 0.3

In [4]:
# Collect all the .npz files
numpy_datasets = glob.glob(data_dir+'dataset*')

In [5]:
# METHOD 1: (first 4 files)
# load the data into numpy arrays from npz files:
for dataset in numpy_datasets[:4]:
    try:
        with np.load(dataset) as data:
            x_dust   = np.append( x_dust, data['x_dust'],axis=0)
            x_nodust = np.append( x_nodust, data['x_nodust'],axis=0)
            y        = np.append( y, data['y'],axis=0)
            ids      = np.append( ids, data['id'],axis=0)
    except:
        with np.load(dataset) as data:
            x_dust   = data['x_dust']
            x_nodust = data['x_nodust']
            y        = data['y']
            ids      = data['id']

In [6]:
# METHOD 2: (first 4 files)
# load the data into a dictionary from npz files
data_dict = {}
for dataset in numpy_datasets[:4]:
    with np.load(dataset) as data:
        if 'x_dust' in data_dict.keys():
            data_dict['x_dust']   = np.append( data_dict['x_dust'], data['x_dust'],axis=0)
            data_dict['x_nodust'] = np.append( data_dict['x_nodust'], data['x_nodust'],axis=0)
            data_dict['y']        = np.append( data_dict['y'], data['y'],axis=0)
            data_dict['ids']      = np.append( data_dict['ids'], data['id'],axis=0)
        else:
            data_dict['x_dust']   = data['x_dust']
            data_dict['x_nodust'] = data['x_nodust']
            data_dict['y']        = data['y']
            data_dict['ids']      = data['id']

In [7]:
# check the sahpe of the array (100 images of 500x500x3 = 4 files of 25 images)
x_dust.shape

(100, 500, 500, 3)

In [8]:
# check the shape of the dictionary (100 images of 500x500x3 = 4 files of 25 images)
data_dict['x_dust'].shape

(100, 500, 500, 3)

In [9]:
# create the dataset from the dictionary and see the size and type
dataset = tf.data.Dataset.from_tensor_slices(data_dict)
print(dataset)

<TensorSliceDataset shapes: {x_dust: (500, 500, 3), x_nodust: (500, 500, 3), y: (3,), ids: ()}, types: {x_dust: tf.float64, x_nodust: tf.float64, y: tf.float64, ids: tf.string}>


In [10]:
# separate in batches and shuffle the data with a fixed seed
dataset = dataset.batch(batch_size)
dataset = dataset.shuffle(10, seed=123)
AUTO = tf.data.experimental.AUTOTUNE

In [3]:
# Build model
base_model = ResNet34(input_shape=(sizey,sizex,3), weights='imagenet', include_top=False)

# TODO: Only retrain the last X layers (e.g. last 30)
#for l in base_model.layers[:-30]:
#    l.trainable = False
#for layer in base_model.layers:
#    print(layer, layer.trainable)

x = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
output = tf.keras.layers.Dense(3, activation='softmax')(x)
model = tf.keras.models.Model(inputs=[base_model.input], outputs=[output])

# Use existing best weights if available...
snapshot_weights = 'best_weights.hdf5'
if os.path.isfile(snapshot_weights):
    model.load_weights(snapshot_weights)


model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])

# For tensorboard
log_dir_s = log_dir + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
os.makedirs(log_dir_s, exist_ok=True)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir_s, histogram_freq=1, update_freq='batch', profile_batch=0)

# Checkpointing
checkpointer = tf.keras.callbacks.ModelCheckpoint(filepath=snapshot_weights,
                               monitor='val_accuracy',
                               verbose=1,
                               save_best_only=True)

callbacks = [
    tensorboard_callback,
    checkpointer
]

model.fit(
    train_ds_aug,
    validation_data = validation_ds,
    epochs = nb_epochs,
    callbacks = callbacks,
    class_weight=class_weight)

model.save('trained.h5') # Not really needed as we have the weights already