In [None]:
%matplotlib notebook

from collections import Counter
import json
import os
import sys
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

from imgaug import augmenters as iaa
from keras.applications.imagenet_utils import _IMAGENET_MEAN
# from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.applications.resnet50 import preprocess_input, ResNet50
# from keras.applications.mobilenet import preprocess_input, MobileNet
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, TensorBoard, EarlyStopping
from keras.optimizers import Nadam
from pycocotools.coco import COCO
import keras.backend as K
import matplotlib.pyplot as plt
import numpy as np
from skimage.transform import resize

from abyss_deep_learning.keras.classification import (
    ClassificationDataset, caption_map_gen, onehot_gen, augmentation_gen, PRTensorBoard)
from abyss_deep_learning.keras.utils import batching_gen, lambda_gen
import abyss_deep_learning.abyss_dataset as dataset_model
from herbicide.utils import vis_square

# Setup Data

In [None]:
############### CONFIGURE THIS ########################
# num_classes assumed from caption_map entries
# image_dims = (224, 224, 3) # Preset for Mobilenet
image_dims = (299, 299, 3) # Preset for InceptionV3
batch_size = 4
NN_DTYPE = np.float32

# maps caption strings to class numbers (ensure minimal set of class numbers)
# eg use {0, 1, 2} not {4, 7, 8}

# Caption type can be either "single" or "multi".
# This sets up various parameters in the system.
# If conversion between single and multi is required this should be done explicitly and presented
# in a separate json file. The internal representation of all the labels is one-hot encoding.
caption_type = "multi" 
caption_map = {
    "IP": 0,
    "JD_ML": 1,
    "DD": 2,
    "JD_S": 3,
    "ED_All": 4
}
# caption_map = {
#     'f': 1,
#     's': 0
# }
coco_train = ClassificationDataset(caption_map, "/data/abyss/projectmax/feature-detection/2/all2_train.json")
coco_val = ClassificationDataset(caption_map, "/data/abyss/projectmax/feature-detection/2/all2_val.json")
coco_test = ClassificationDataset(caption_map, "/data/abyss/projectmax/feature-detection/2/all2_test.json")

In [None]:
# # Convert instances to categories
# from abyss_deep_learning.coco_classes import CocoDataset
# ds = CocoDataset.from_COCO(coco_train)
# ds.convert_instances_to_captions()
# ds.save("/data/abyss/projectmax/feature-detection/2/training2.json")

In [None]:
caption_map_r = {val: key for key, val in caption_map.items()}
num_classes = len(caption_map)
steps_per_epoch = coco_train.num_images() // batch_size
steps_per_epoch_val = coco_val.num_images() // batch_size
print("Number of classes:", num_classes)
print("Steps per epoch:", steps_per_epoch)
print("Steps per steps_per_epoch_val:", steps_per_epoch_val)

In [None]:
def preprocess(image, caption):
    image = resize(image, image_dims, preserve_range=True)
    return preprocess_input(image.astype(NN_DTYPE), mode='tf'), caption

def postprocess(image):
    return ((image + 1) * 127.5).astype(np.uint8)

def cast_dtype_gen(gen, input_dtype, target_dtype):
    for inputs, targets in gen:
        yield inputs.astype(input_dtype), targets.astype(target_dtype)

def pipeline(gen, aug_config=None):
    return (
        augmentation_gen(
            onehot_gen(
                lambda_gen(
                    caption_map_gen(gen, caption_map, background='background', skip_bg=True)
                , func=preprocess)
            , num_classes=num_classes)
        , aug_config, enable=(aug_config is not None))
    )

def augmentation_gen(gen, aug_config, enable=True):
    '''
    Data augmentation for classification task.
    Target is untouched.
    '''
    if not enable:
        while True:
            yield from gen
    aug_list = []
    if 'flip_lr_percentage' in aug_config:
        aug_list += [iaa.Fliplr(aug_config['flip_lr_percentage'])]
    if 'flip_ud_percentage' in aug_config:
        aug_list += [iaa.Flipud(aug_config['flip_ud_percentage'])]
    if 'affine' in aug_config:
        aug_list += [iaa.Affine(**aug_config['affine'])]
#     if 'color' in aug_config: #  Color aug not working  yet
#         aug_list += [iaa.Sometimes(
#             aug_config['color']['probability'], iaa.Sequential([
#             iaa.ChangeColorspace(from_colorspace="RGB", to_colorspace="HSV"),
#             iaa.WithChannels(0, iaa.Add(aug_config['color']['hue'])),
#             iaa.WithChannels(1, iaa.Add(aug_config['color']['saturation'])),
#             iaa.WithChannels(2, iaa.Add(aug_config['color']['value'])),
#             iaa.ChangeColorspace(from_colorspace="HSV", to_colorspace="RGB")
#         ]))]
    seq = iaa.Sequential(aug_list)
    for image, target in gen:
        yield seq.augment_image(image), target
        
aug_config = {
    'flip_lr_percentage': 0.5,
    'flip_ud_percentage': 0.5,
    'affine': {
        "order": 1,
        'scale': {
            "x": (0.8, 1.2),
            "y": (0.8, 1.2)
        },
        "rotate": (-10, 10),
        "shear": (-5, 5),
        "mode": 'constant'
    },
#     'color': {
#         'probability': 1.00,
#         'hue': (0, 0),
#         'saturation': (0, 0),
#         'value': (0, 0)
#     }
}
# aug_config = None

In [None]:
train_gen = pipeline(
    coco_train.generator(shuffle_ids=True),
    aug_config=aug_config)
val_gen = pipeline(coco_val.generator(shuffle_ids=True))
test_gen = pipeline(coco_test.generator(shuffle_ids=True))
    
for i, (train, val, test) in enumerate(zip(train_gen, val_gen, test_gen)):
    for data in (train, val, test):
        print(data[0].shape, data[1], (np.min(data[0]), np.max(data[0])))
    plt.figure()
    plt.subplot(1, 3, 1)
    plt.imshow(postprocess(train[0]))
    plt.title(', '.join([caption_map_r[int(cap_id)] for cap_id in np.argwhere(train[1])]))
    
    plt.subplot(1, 3, 2)
    plt.imshow(postprocess(val[0]))
    plt.title(', '.join([caption_map_r[int(cap_id)] for cap_id in np.argwhere(val[1])]))
    
    plt.subplot(1, 3, 3)
    plt.imshow(postprocess(test[0]))
    plt.title(', '.join([caption_map_r[int(cap_id)] for cap_id in np.argwhere(test[1])]))
    
    if i >= 0:
        break
print("Left to right: ground truth samples from train, val test")        

In [None]:
def gen_dump_data(gen, num_images):
    data = [[],[]]
    for i, (image, caption) in enumerate(gen):
        data[0].append(image)
        data[1].append(caption)
        if i >= num_images:
            break
    data = (
        np.concatenate([i[np.newaxis, ...] for i in data[0]], axis=0),
        np.concatenate([i[np.newaxis, ...] for i in data[1]], axis=0)
    )
    return data

def count_labels_single(data):
    return Counter([int(j) for i in data[1] for j in np.argwhere(i)])

def count_labels_multi(data):
    values = np.sum(data[1], axis=0).tolist()
    keys = np.arange(len(values))
    return dict(zip(keys, values))

def calc_class_weights(gen, coco):
    data = gen_dump_data(gen, coco.num_images())
    counts = count_function(data)
    class_weights =  np.array([j for i, j in sorted(counts.items(), key=lambda x: x[0])], dtype=NN_DTYPE)
    class_weights /= np.max(class_weights)
    class_weights = dict(zip(sorted(counts.keys()), class_weights.tolist()))
    return class_weights

In [None]:
count_function = count_labels_single if caption_type == "single" else count_labels_multi

for label, gen, coco in zip(
        ["train", "val", "test"],
        [train_gen, val_gen, test_gen],
        [coco_train, coco_val, coco_test]):
    data = gen_dump_data(gen, coco.num_images())
    counter = count_function(data)
    print(label, counter)

val_data = gen_dump_data(val_gen, coco_val.num_images())
class_weights = calc_class_weights(train_gen, coco_train)
print("training class weights:")
print(class_weights)

# Setup model

In [None]:
from keras.layers import Dense, MaxPooling2D, Dropout, Flatten, GlobalAveragePooling2D, GlobalMaxPooling2D
from keras.models import Model
from keras.losses import binary_crossentropy
import keras.initializers
import keras.regularizers
from keras.layers import Input, Dense, Conv2D, MaxPool2D, ZeroPadding2D, Dropout, BatchNormalization, Activation


In [None]:
def create_new_head(
    base_model, num_classes, caption_type,
    pooling=None, num_hidden_neurons=1024, num_hidden_layers=2,
    dropout=None,
    train_features=False, opt_params={}, l2_reg=None):
    '''make sure base_model has include_top=False'''

    global model_name    
    
    model_name += "_P" + str(pooling).lower()
    model_name += "_L{:d}_U{:d}".format(num_hidden_layers, num_hidden_neurons)
    
    if not opt_params:
        opt_params = {"optimizer": "Nadam"}
    
    if caption_type == "single":
        opt_params['loss'] = "categorical_crossentropy" 
    elif caption_type == "multi":
        weights = np.array([i[1] for i in sorted(class_weights.items())])[np.newaxis, ...] \
            if class_weights else 1.0
        opt_params['loss'] = binary_crossentropy #lambda y_true, y_pred: \
        
    activation = "softmax" if caption_type == "single" else "sigmoid"
    
    # add a global spatial average pooling layer
    x = base_model.output
    
    if pooling == 'avg':
        x = GlobalAveragePooling2D()(x)
    elif pooling == 'max':
        x = GlobalMaxPooling2D()(x)
    else:
        x = Flatten()(x)
    for i in range(num_hidden_layers):
        x = Dense(num_hidden_neurons, activation='relu', 
                  kernel_initializer=keras.initializers.he_uniform())(x)
        if dropout:
            x = Dropout(dropout)(x)
    predictions = Dense(
        num_classes,
        activation=activation,
        kernel_initializer=keras.initializers.he_uniform(),
        name='class_logits')(x)

    # this is the model we will train
    model = Model(inputs=base_model.input, outputs=predictions)
    # first: train only the top layers (which were randomly initialized)
    for layer in model.layers[11:]: # keep first 3 layers of conv
        layer.trainable = train_features
    # compile the model (should be done *after* setting layers to non-trainable)
    
    if l2_reg:
        for layer in model.layers[11:]:
            if not layer.trainable:
                continue
            if hasattr(layer, 'kernel_regularizer'):
                layer.activity_regularizer = keras.regularizers.l2(l2_reg)
                layer.kernel_initializer = keras.initializers.he_uniform()
#             if hasattr(layer, 'activity_regularizer'):
#                 layer.activity_regularizer = keras.regularizers.l1(l2_reg)
    
    print("Compiling model:")
    print("   l2 reg :", l2_reg)
    print("   activation:", activation)
    print("   optimizer:", opt_params)
    model.compile(**opt_params, metrics=['accuracy'])
    return model

In [None]:
from itertools import cycle
from sklearn.metrics import precision_recall_curve, average_precision_score, accuracy_score
from sklearn.metrics import accuracy_score

def evaluate_model(model):
    def multi_label_decision(y_true, y_pred, thresh=0.5):
        return (y_true > thresh) == (y_pred > thresh)
    def single_label_decision(y_true, y_pred):
        return np.argmax(y_true, axis=-1) == np.argmax(y_pred, axis=-1)

    decision_function = single_label_decision if caption_type == 'single' else multi_label_decision
    thresh = 0.5 # Used for multi-label decisions

    test_data = gen_dump_data(test_gen, coco_test.num_images())
    Y_true = test_data[1]
    Y_pred = model.predict(test_data[0])
    TP = decision_function(Y_true, Y_pred)
    test_accuracy = accuracy_score(Y_true.astype(np.bool), Y_pred > thresh)
    print("Test accuracy for {:d} samples: {:.2f}".format(len(test_data[0]), test_accuracy))
    # for i, (image, true_caption, pred_caption) in enumerate(zip(test_data[0], test_data[1], Y_pred)):
    #     if i % 4 == 0:
    #         if i > 0:
    #             plt.tight_layout()
    #         if i >= 4:
    #             break
    #         if i < len(test_data[0]):
    #             plt.figure()
    #     plt.subplot(2, 2, 1 + (i % 4))
    #     plt.imshow(postprocess(image))
    #     plt.title("T: {:s}; P: {:s}".format(
    #         ', '.join([caption_map_r[int(cap_id)] for cap_id in np.argwhere(true_caption > thresh)]),
    #         ', '.join([caption_map_r[int(cap_id)] for cap_id in np.argwhere(pred_caption > thresh)])
    #     ))
    test_metrics = model.evaluate(test_data[0], test_data[1])
    print("test_metrics", test_metrics)
    print(TP.sum(axis=0) / Y_true.sum(axis=0))
    return Y_true, Y_pred, TP, test_metrics

def display_performance(Y_true, Y_pred, TP):
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(num_classes):
        precision[i], recall[i], _ = precision_recall_curve(Y_true[:, i],
                                                            Y_pred[:, i])
        average_precision[i] = average_precision_score(Y_true[:, i], Y_pred[:, i])

    # A "micro-average": quantifying score on all classes jointly
    precision["micro"], recall["micro"], _ = precision_recall_curve(Y_true.ravel(),
        Y_pred.ravel())
    average_precision["micro"] = average_precision_score(Y_true, Y_pred,
                                                         average="micro")
    print('Average precision score, micro-averaged over all classes: {0:0.2f}'
          .format(average_precision["micro"]))
    print("Accuracy:", accuracy_score(Y_true, Y_pred > 0.5))
    
    # setup plot details
    colors = cycle(['navy', 'turquoise', 'darkorange', 'cornflowerblue', 'teal'])

    plt.figure(figsize=(8, 10))
    f_scores = np.linspace(0.2, 0.8, num=4)
    lines = []
    labels = []
    for f_score in f_scores:
        x = np.linspace(0.01, 1)
        y = f_score * x / (2 * x - f_score)
        l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2)
        plt.annotate('f1={0:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02))

    lines.append(l)
    labels.append('iso-f1 curves')
    l, = plt.plot(recall["micro"], precision["micro"], color='gold', lw=2)
    lines.append(l)
    labels.append('micro-average Precision-recall (area = {0:0.2f})'
                  ''.format(average_precision["micro"]))

    for i, color in zip(range(num_classes), colors):
        l, = plt.plot(recall[i], precision[i], color=color, lw=2)
        lines.append(l)
        labels.append('{0} (area = {1:0.2f})'
                      ''.format(caption_map_r[i], average_precision[i]))

    fig = plt.gcf()
    fig.subplots_adjust(bottom=0.25)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Micro Average Precision vs. Recall')
    plt.legend(lines, labels, loc=(0, -.4), prop=dict(size=14))
    plt.show()
    plt.savefig(model_plot_path, dpi=150)
    
def save_model(model, name, class_map_r, prediction_type,
               model_weights_path, model_def_path, model_info_path, history,
               test_metrics=None, description=""):
    from abyss.utils import JsonNumpyEncoder
    def merged(a, b):
        merged = dict(a)
        merged.update(b)
        return merged
        
    model_info = {
        "name": name,
        "description": description,
        "weights": model_weights_path,
        "prediction_type": caption_type,
        "model": model_def_path,
        "classes": class_map_r,
        "architecture": {
            "backbone": "inceptionv3",
            "logit_activation": model.get_layer("class_logits").activation.__name__,
            "input_shape": image_dims
        },
        "metrics": {
            "loss_function": str(history.model.loss),
            "train": merged(
                history.history,
                {
                    "epoch": history.epoch,
                    "params": history.params
                })
        }
    }
    if test_metrics:
        model_info['metrics']['test'] = test_metrics
    
    print("Writing model def to " + model_def_path)
    with open(model_def_path, "w") as file:
        file.write(model.to_json())
        
    print("Writing model weights to " + model_weights_path)
    model.save_weights(model_weights_path)
    
    print("Writing model info to " + model_info_path)
    with open(model_info_path, "w") as file:
        file.write(json.dumps(model_info, cls=JsonNumpyEncoder))

# Model selection

In [None]:
def model_resnet(input_shape, weights='imagenet'):
    global model_name
    model_name = 'model_resnet'
    return ResNet50(include_top=False, weights='imagenet', input_shape=input_shape)

def test_model(input_shape, dropout=0, activation=None, weights=None):
    global model_name
    model_name = 'test_model' + ('_' + activation if activation else '')

    def unit(x, filters, convs=1):
        reg = keras.regularizers.l2(l2) if l2 else None
        for i in range(convs):
            x = Conv2D(filters, (3, 3),
                       kernel_regularizer=reg,
                       kernel_initializer=keras.initializers.he_uniform())(x)
            x = BatchNormalization()(x)
            if activation:
                x = Activation(activation)(x)
        if dropout:
            x = Dropout(dropout)(x)
        return x
    
    x_in = Input(shape=input_shape)
    x = ZeroPadding2D(padding=(1, 1))(x_in)
    for i, convs in zip(range(3), [3, 2, 1]):
        x = unit(x, int(64 * 2 ** i), convs=convs)
        x = MaxPool2D(pool_size=(2, 2))(x)
    model = Model(x_in, x)
    return model

def test_model_linear(input_shape):
    global model_name
    model_name = 'test_model_linear'
    x_in = Input(shape=input_shape)
    x = ZeroPadding2D(padding=(5, 5))(x_in)
    x = Conv2D(32, (3, 3), kernel_initializer=keras.initializers.he_uniform())(x)
    x = MaxPool2D(pool_size=(2, 2))(x)
    x = Conv2D(64, (3, 3), kernel_initializer=keras.initializers.he_uniform())(x)
    x = MaxPool2D(pool_size=(2, 2))(x)
    x = Conv2D(128, (3, 3), kernel_initializer=keras.initializers.he_uniform())(x)
    x = MaxPool2D(pool_size=(2, 2))(x)
    x = Conv2D(256, (3, 3), kernel_initializer=keras.initializers.he_uniform())(x)
    x = MaxPool2D(pool_size=(2, 2))(x)
    model = Model(x_in, x)
    return model

In [None]:
def make_model(model, train_features, pooling, dropout, l2_reg, num_hidden_layers, num_hidden_neurons, activation=None):
    def default_model():
        return test_model(input_shape=image_dims, activation=activation, dropout=dropout)
    
    if model is None:
        model = default_model()
    return create_new_head(
        model,
#         model_resnet(input_shape=image_dims),
#         InceptionV3(include_top=False, weights='imagenet', input_shape=image_dims),
#         MobileNet(input_shape=image_dims, include_top=False),
        num_classes, caption_type,
        num_hidden_layers=num_hidden_layers, num_hidden_neurons=num_hidden_neurons,
        pooling=pooling, train_features=train_features,
        dropout=dropout, l2_reg=l2_reg,
        opt_params={'optimizer': Nadam(clipnorm=5)})

In [None]:
history_data = {}
class_weights__ = class_weights
for attempt_no in range(200):
    model_activation = np.random.choice([None, 'relu'])
    learning_rate = 10 ** np.random.uniform(-6, -3)
    l2 = 10 ** np.random.uniform(-3, -0.7)
    dropout = np.random.uniform(0.1, 0.6)
    train_features = True #np.random.choice([True, False])
    pooling = np.random.choice(['avg', 'max'])
    num_hidden_layers = np.random.choice([1, 2])
    num_hidden_neurons = int(2 ** np.floor(np.random.uniform(3, 13)))
    
    K.clear_session()
    model = make_model(
        None,
        train_features, pooling,
        dropout, l2,
        num_hidden_layers, num_hidden_neurons, activation=model_activation)
    
    key = (model_name, float(learning_rate), float(l2 or 0), bool(train_features))
    experiment_name = "{:s}--{:s}--CW{:d}--BS{:d}--LR{:.2e}_sched--reg{:.2e}".format(
        model_name,
        'all' if train_features else 'heads',
        1 if class_weights__ else 0,
        batch_size,
        learning_rate,
        l2 or 0)
    log_dir = "/data/log/cnn/fd/wednesday-search/" + experiment_name
    best_path = os.path.join(log_dir, "models/best.{epoch:03d}-{val_loss:.4f}.h5")
    !mkdir -p $log_dir/models
    print(experiment_name)
    print(log_dir)
    callbacks=[
            ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=8, cooldown=0, verbose=1),
            ModelCheckpoint(
                best_path, monitor='val_loss', verbose=1,
                save_best_only=True, save_weights_only=True, mode='auto', period=1),
            ModelCheckpoint(
                best_path, monitor='val_loss', verbose=1,
                save_best_only=False, save_weights_only=True, mode='auto', period=20),
            PRTensorBoard(
                log_dir=log_dir, 
                histogram_freq=10, batch_size=batch_size,
                write_graph=True,
                write_grads=True,
                write_images=True),
#             EarlyStopping(
#                 monitor='val_loss', min_delta=0.0, patience=25, verbose=1, mode='auto')
    ]
    K.set_value(model.optimizer.lr, learning_rate)
    history_data[key] = model.fit_generator(
        batching_gen(train_gen, batch_size=batch_size),
        validation_data=tuple(val_data),
        steps_per_epoch=steps_per_epoch,
        validation_steps=steps_per_epoch_val,
        class_weight=class_weights__,
        callbacks=callbacks, 
        epochs=50,
        verbose=0, initial_epoch=0, workers=10)
    
    # Fill in the relevant params below    
    model_def_path = os.path.join(log_dir, "model_def.json")
    model_weights_path = os.path.join(log_dir, "model_weights.h5")
    model_info_path = os.path.join(log_dir, "model.json")
    model_plot_path = os.path.join(log_dir, "precision-recall.png")
    (Y_true, Y_pred, TP, test_metrics) = evaluate_model(model)
    display_performance(Y_true, Y_pred, TP)
    
    save_model(
        model, name=experiment_name,
        class_map_r=caption_map_r, prediction_type=caption_type,
        model_weights_path=model_weights_path, model_def_path=model_def_path, model_info_path=model_info_path,
        test_metrics=test_metrics, history=history_data[key],
        description="Test model for 5 FDs"
    )

# Learning Rate Search
Model must be constant by this point.

In [None]:
K.clear_session()
model = make_model(
    None,
    train_features, pooling,
    dropout, l2,
    num_hidden_layers, num_hidden_neurons)
model.save_weights("/data/tmp/base.h5")

# train_data = gen_dump_data(train_gen, coco_train.num_images())
# history_lr = {}
for lr in 10 ** np.random.uniform(-7.85, -6.25, size=3):
    model.load_weights("/data/tmp/base.h5")
    history = model.fit(
        train_data[0], train_data[1],
        class_weight=class_weights__, batch_size=batch_size,
        validation_data=val_data,
        epochs=5, verbose=1)
    history_lr[float(lr)] = history.history
# del train_data

In [None]:
from itertools import cycle
lrs = [lr for lr, val  in sorted(history_lr.items(), key=lambda x: x[0])]
val_loss = [history['val_loss'][-1] for lr, history  in sorted(history_lr.items(), key=lambda x: x[0])]
loss = [history['val_loss'][-1] for lr, history  in sorted(history_lr.items(), key=lambda x: x[0])]

plt.figure()
plt.subplot(1, 2, 1)
plt.semilogx(lrs, loss, '.')
plt.subplot(1, 2, 2)
plt.semilogx(lrs, val_loss, '.')

In [None]:
# Continue training specific model
K.set_value(model.optimizer.lr, 3e-4)
model.fit_generator(
    batching_gen(train_gen, batch_size=batch_size),
    validation_data=tuple(val_data),
    steps_per_epoch=steps_per_epoch,
    validation_steps=steps_per_epoch_val,
    class_weight=class_weights__,
    callbacks=[
            ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=6, cooldown=2, verbose=1),
            ModelCheckpoint(
                best_path, monitor='val_loss', verbose=1,
                save_best_only=True, save_weights_only=True, mode='auto', period=1),
            ModelCheckpoint(
                best_path, monitor='val_loss', verbose=1,
                save_best_only=False, save_weights_only=True, mode='auto', period=20),
            PRTensorBoard(
                log_dir=log_dir, 
                histogram_freq=0, batch_size=batch_size,
                write_graph=True,
                write_grads=False,
                write_images=False),
    ], 
    epochs=200,
    verbose=1, initial_epoch=80, workers=10)

# Model Testing

In [None]:
(Y_true, Y_pred, TP, test_metrics) = evaluate_model(model)
display_performance(Y_true, Y_pred, TP)

In [None]:
{caption_map_r[i]:v for i, v in class_weights__.items()}

In [None]:
TP_mask = np.logical_and.reduce(TP, axis=1)
right = test_data[0][TP_mask]
wrong = test_data[0][~TP_mask]
wrong.shape
plt.figure()
vis_square(wrong)
plt.title("Incorrectly Predicted")
plt.figure()
vis_square(right)
plt.title("Correctly Predicted")

# Binary coded the labels then count them wrt TP/FP
coded = np.sum(test_data[1][~TP_mask] * 2 ** np.arange(num_classes)[::-1], axis=1).astype(int)
print("binary coded class error count:", dict(sorted(Counter(coded).items(), key=lambda x: x[0])))
coded = np.sum(test_data[1][TP_mask] * 2 ** np.arange(num_classes)[::-1], axis=1).astype(int)
print("binary coded class correct count:", dict(sorted(Counter(coded).items(), key=lambda x: x[0])))
print(Y_pred[~TP_mask])

# Save model (pretty important!)

In [None]:
for key, val in history.history.items():
    print(key, type(val[0]))

In [None]:

    
# # Fill in the relevant params below    
# model_def_path = os.path.join(log_dir, "model_def.json")
# model_weights_path = os.path.join(log_dir, "model_weights.h5")
# model_info_path = os.path.join(log_dir, "model.json")
# model_plot_path = os.path.join(log_dir, "precision-recall.png")
# (Y_true, Y_pred, TP, test_metrics) = evaluate_model(model)
# display_performance(Y_true, Y_pred, TP)
# save_model(
#     model, name=experiment_name,
#     class_map_r=caption_map_r, prediction_type=caption_type,
#     model_weights_path=model_weights_path, model_def_path=model_def_path, model_info_path=model_info_path,
#     test_metrics=test_metrics, history=history,
#     description="Model for detecting whether camera is forwards or sidewards facing in a pipe."
# )

# Learning Curve

In [None]:
def learning_curve(dataset, lr, steps, val_data, log_dir):
    def save_model(path):
        print("Saving", path)
        os.makedirs(os.path.dirname(path), exist_ok=True)
        model.save_weights(path)
    def setup_callbacks():
        return [
                ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, cooldown=5, verbose=1),
                ModelCheckpoint(
                    model_best_path, monitor='val_loss', verbose=1,
                    save_best_only=True, save_weights_only=True, mode='auto', period=1),
#                 ModelCheckpoint(
#                     best_path, monitor='val_loss', verbose=1,
#                     save_best_only=False, save_weights_only=True, mode='auto', period=50),
                PRTensorBoard(
                    log_dir=model_log_dir,
                    histogram_freq=0,
                    batch_size=batch_size,
                    write_graph=False,
                    write_grads=False,
                    write_images=False),
        #         EarlyStopping(
        #             monitor='val_loss', min_delta=0.0, patience=40, verbose=1, mode='auto')
        ]
    def create_new_model(load_base=False):
        K.clear_session()
        model = create_new_head(
            InceptionV3(include_top=False, weights='imagenet', input_shape=image_dims),
            num_classes, caption_type, opt_params={'optimizer': Nadam()},
            class_weights=None, train_features=False, l2_reg=None)
        if load_base:
            print("Loading base model")
            model.load_weights(base_model_path, by_name=True)
        return model

    def train():
        print("Training")
        K.set_value(model.optimizer.lr, lr)
        history[subset_size] = model.fit_generator(
            batching_gen(gen, batch_size=batch_size),
            validation_data=tuple(val_data),
            steps_per_epoch=(subset_size // batch_size),
            validation_steps=steps_per_epoch_val,
            class_weight=model_class_weights,
            callbacks=setup_callbacks(), 
            epochs=50,
            verbose=1)
    model_class_weights = None
    model = None
    model_path = None
    image_ids = [image['id'] for image in dataset.imgs.values()]
    np.random.shuffle(image_ids)
    num_images = len(image_ids)
    print("num_images", num_images)
    history = {}
    base_model_path = os.path.join(log_dir, "base", "weights.h5")
    model_path = base_model_path
    for subset_size in np.linspace(0, num_images, steps + 1).astype(int):
        if subset_size > 0:
            imgIds = image_ids[:subset_size]
            gen = pipeline(
                dataset.generator(shuffle_ids=False, imgIds=imgIds),
                aug_config=None)
            model_class_weights = calc_class_weights(gen, dataset)

            model_path = os.path.join(log_dir, "subset-of-{:d}/weights.h5".format(subset_size))
            model_log_dir = os.path.dirname(model_path)
            model_best_path = os.path.join(log_dir, "subset-of-{:d}/best.h5".format(subset_size))
            os.makedirs(model_log_dir, exist_ok=True)

            print("learning curve(lr={:.3e}, size={:d})".format(lr, subset_size))
            print("model_log_dir", model_log_dir)
            print("training class weights")
            print(model_class_weights)
        model = create_new_model(load_base=(subset_size > 0))
        if subset_size:
            train()
        save_model(model_path)
    return history

model = None
lr = 1e-5
learning_curve_dir = "/data/log/cnn/fd/learning_curve_5--{:.2e}".format(lr)
lc_history = learning_curve(coco_train, lr, 5, val_data, learning_curve_dir)
val_loss = np.array([(size, h.history['val_loss'][-1]) for size, h in lc_history.items()])
train_loss = np.array([(size, h.history['loss'][-1]) for size, h in lc_history.items()])
plt.figure()
plt.plot(train_loss[:, 0], train_loss[:, 1], 'b.')
plt.plot(val_loss[:, 0], val_loss[:, 1], 'r.')
plt.xlabel("Number of Training Samples")
plt.ylabel("Loss")
plt.savefig(os.path.join(learning_curve_dir, "plot.png"), dpi=150)

In [None]:
# !rm -R /data/log/cnn/fd/learning-curve/

In [None]:
images = None
for images, labels in batching_gen(train_gen, batch_size=batch_size):
    print(images.shape, labels.shape)
    
    pred = model.predict(images)
    print(labels)
    print(pred)
    print(K.eval(K.tf.losses.sigmoid_cross_entropy(labels, pred)))
    break

# Check Update/Weight Ratio

In [None]:
def get_gradients(model):
    """Return the gradient of every trainable weight in model

    Parameters
    -----------
    model : a keras model instance

    First, find all tensors which are trainable in the model. Surprisingly,
    `model.trainable_weights` will return tensors for which
    trainable=False has been set on their layer (last time I checked), hence the extra check.
    Next, get the gradients of the loss with respect to the weights.

    """
    weights = [tensor for tensor in model.trainable_weights 
               if model.trainable_weights]
    optimizer = model.optimizer

    return weights, optimizer.get_gradients(model.total_loss, weights)

# K.set_value(model.optimizer.lr, 1e-3)
# model.fit(images, labels, batch_size=10)
weights, grads = get_gradients(model)
feed_dict = {
    "class_logits_sample_weights:0": np.ones(2),
    "input_1:0": images,
    "class_logits_target:0": labels
}

for i, (w, g) in enumerate(zip(weights, grads)):
    grad_norm = np.linalg.norm(g.eval(feed_dict, K.get_session()))
    weight_norm = np.linalg.norm(w.eval(K.get_session()))
    rate = grad_norm / weight_norm
    print(i, rate)


# Visualize filters

In [None]:
from herbicide.utils import vis_square
for layer in model.layers:
    if not layer.trainable_weights:
        continue
    for weight in layer.trainable_weights: #  Assumes FD is not trainable
        if 'kernel' not in weight.name:
            continue
        print(weight.name)
        value = K.eval(weight.value())
        print(value.shape)
    
    plt.figure()
    vis_square(value.transpose((3, 0, 1, 2)))
    break
