In [None]:
import ast
import datetime
import talos
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from pathlib import Path
from talos.utils import live, early_stopper
from talos.utils.best_model import activate_model
from keras import backend as K
from keras import optimizers, losses
from keras.callbacks import Callback, EarlyStopping, ModelCheckpoint
from keras.layers import (Activation, Conv2D, Dense, Dropout, Flatten, MaxPooling2D)
from keras.models import Sequential
from keras_preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet import ResNet50, ResNet101, ResNet152
from tensorflow.keras.applications.resnet_v2 import ResNet50V2, ResNet101V2, ResNet152V2
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.models import Model
from tensorflow.keras.utils import plot_model
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import train_test_split
from sklearn.metrics import multilabel_confusion_matrix, classification_report, roc_curve, f1_score, accuracy_score
from sklearn.preprocessing import MultiLabelBinarizer

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


## Some helper functions

In [None]:
def plot_history(history, run):
    plot_path = Path("plots/" + str(run) + "_"+ datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + ".pdf")
    plt.plot(history.history['loss'], label='Train MAE')
    plt.plot(history.history['val_loss'], label='Val MAE')
    plt.plot(history.history['f1score'], label='Train f1')
    plt.plot(history.history['val_f1score'], label='Val f1')
    plt.plot(history.history['acc'], label='Train Acc')
    plt.plot(history.history['val_acc'], label='Val Acc')
    plt.title('MAE F1 and Accuracy for CNN')
    plt.ylabel('Value')
    plt.xlabel('No. epoch')
    plt.legend(loc="upper left")
    plt.savefig(plot_path, bbox_inches='tight')
    plt.show()

In [None]:
def order_label_pivot(ds):
    ds.labels = ds.labels.astype(str)
    ds_balance = ds[['labels', 'order']]
    ds_balance = ds_balance.pivot_table(index="order", columns="labels", aggfunc=len, fill_value=0)
    ds_balance.loc["Total"] = ds_balance.sum()
    return ds_balance

In [None]:
def make_generators(train, val, test, ds_dir, hp):
    def orthogonal_rot(image):
        return np.rot90(image, np.random.choice([-1, 0, 1]))

    if hp['rotation']: preprocessing_function = orthogonal_rot
    else: preprocessing_function = None
    generator = ImageDataGenerator(
        rescale=1./255.,
        preprocessing_function=preprocessing_function,
        width_shift_range=hp['width_shift'],
        height_shift_range=hp['height_shift']
        )
    test_generator = ImageDataGenerator(rescale=1./255.) 

    train_gen = generator.flow_from_dataframe(
        train,
        directory=ds_dir,
        x_col='filename',
        y_col='labels',
        class_mode='categorical',
        target_size=hp['model_name_size'][1][:2],
        batch_size=hp['batch_size'],
        color_mode=hp['color_mode']
    )
    val_gen = test_generator.flow_from_dataframe(
        val,
        directory=ds_dir,
        x_col='filename',
        y_col='labels',
        class_mode='categorical',
        target_size=hp['model_name_size'][1][:2],
        batch_size=hp['batch_size'],
        color_mode=hp['color_mode']
    )
    test_gen = test_generator.flow_from_dataframe(
        test,
        directory=ds_dir,
        x_col='filename',
        y_col='labels',
        class_mode='categorical',
        target_size=hp['model_name_size'][1][:2],
        batch_size=hp['batch_size'],
        color_mode=hp['color_mode'],
        shuffle=False
    )

    return train_gen, val_gen, test_gen

## Model creation functions

In [None]:
def make_resnet(hp):
    if hp['train_layers']: weights = None
    else: weights = 'imagenet'

    if hp['model_name_size'][0] == 'resnet50':
        preprocess_input = tf.keras.applications.resnet.preprocess_input
        base_model = ResNet50(
            include_top=False,
            weights = weights,
            input_shape=hp['model_name_size'][1],
            pooling=hp['pooling'],
            classes=2
        )
    if hp['model_name_size'][0] == 'resnet50v2':
        preprocess_input = tf.keras.applications.resnet_v2.preprocess_input
        base_model = ResNet50V2(
            include_top=False,
            weights=weights, #'imagenet'
            input_shape=hp['model_name_size'][1],
            pooling=hp['pooling'],
            classes=2
        ) 
    if hp['model_name_size'][0] == 'resnet152':
        preprocess_input = tf.keras.applications.resnet.preprocess_input
        base_model = ResNet152(
            include_top=False,
            weights = weights,
            input_shape=hp['model_name_size'][1],
            pooling=hp['pooling'],
            classes=2
        )
    if hp['model_name_size'][0] == 'resnet152v2':
        preprocess_input = tf.keras.applications.resnet_v2.preprocess_input
        base_model = ResNet152V2(
            include_top=False,
            weights = weights,
            input_shape=hp['model_name_size'][1],
            pooling=hp['pooling'],
            classes=2
        )
    return base_model, preprocess_input
    
def make_vgg16(hp):
    preprocess_input = tf.keras.applications.vgg16.preprocess_input
    if hp['train_layers']: weights = None
    else: weights = 'imagenet'
    base_model = VGG16(    
        include_top=False,
        weights = weights,
        input_shape=hp['model_name_size'][1],
        pooling=hp['pooling'],
        classes=2
    )
    return base_model, preprocess_input

def make_vgg19(hp):
    preprocess_input = tf.keras.applications.vgg19.preprocess_input
    if hp['train_layers']: weights = None
    else: weights = 'imagenet'
    base_model = VGG19(    
        include_top=False,
        weights = weights,
        input_shape=hp['model_name_size'][1],
        pooling=hp['pooling'],
        classes=2
    )
    return base_model, preprocess_input

def make_model(hp):
    if hp['model_name_size'][0] in ['resnet50', 'resnet50v2', 'resnet152', 'resnet152v2']: base_model, preprocess_input = make_resnet(hp)
    if hp['model_name_size'][0] == 'vgg16': base_model, preprocess_input = make_vgg16(hp)
    if hp['model_name_size'][0] == 'vgg19': base_model, preprocess_input = make_vgg19(hp)
    if not hp['train_layers']: base_model.trainable = False

    x = Flatten()(base_model.output)
    x = Dense(hp['dense'], activation='relu')(x)
    
    predictions = Dense(2, activation=hp["activation"])(x)
    model = Model(inputs=base_model.input, outputs=predictions)

    model.compile(
        optimizer=optimizers.Adam(learning_rate=hp['lr']),
        loss='binary_crossentropy',
        metrics=["acc", talos.utils.metrics.f1score])
    # https://machinelearningmastery.com/how-to-choose-loss-functions-when-training-deep-learning-neural-networks/

    # plot_model(model, to_file= 'model_plot.pdf', show_shapes=True, show_layer_names=False)
    return model

## Run the model

In [None]:
def gen_dataset(DF_PATH, hp, undersample):
    ds = pd.read_csv(DF_PATH, usecols=['filename', 'labels', 'order'])
    ds = ds.dropna(subset=['labels'])
    ds.labels = ds.labels.apply(ast.literal_eval)
    print(f"Original: {ds.shape}")

    if undersample:
        rus = RandomUnderSampler(sampling_strategy='all')
        ds, _ = rus.fit_resample(ds, ds.labels.astype(str)) 
    ds = ds.sample(hp["samples"], random_state=8)
    # ds_balance = order_label_pivot(ds)
    # print(ds_balance)
    TRAIN, val_test = train_test_split(ds, test_size=0.5, random_state=8)
    VAL, TEST = train_test_split(val_test, test_size=0.5, random_state=8)
    print(f"Train: {TRAIN.shape}\nVal: {VAL.shape}\nTest: {TEST.shape}")
    return TRAIN, VAL, TEST

In [None]:
def wrapper(x, y, x_val, y_val, hp):
    DS_DIR = Path('c:/Users/flori/download/subset')
    DF_PATH = Path('C:/Users/flori/OneDrive/Documents/Uni/8_Master_thesis/code/subset_logs/20210518-001138.csv')

    model = make_model(hp)
    # plot_model(model, to_file= 'plots/model_plot_{}.pdf'.format(hp['experiment']), show_shapes=True, show_layer_names=False)
    train_df, val_df, test_df = gen_dataset(DF_PATH, hp, undersample=False)
    train_gen, val_gen, test_gen = make_generators(train_df, val_df, test_df, DS_DIR, hp)

    # es = tf.keras.callbacks.EarlyStopping(monitor='val_f1score', min_delta=0.005, patience=3, verbose=1,mode='max')
    out = model.fit(
        train_gen,
        validation_data=val_gen,
        epochs=hp['epochs'],
        verbose=1
        ,callbacks=[] #es 
        ,workers=8
        ,max_queue_size=16
    )
    # print(model.summary())

    # plot_history(out, hp['experiment'])
    return out, model

In [None]:
samples = 4000
params = {
    "experiment": ['transfer'],
    "samples": [samples],
    "color_mode": ["rgb"],
    "epochs": [7],
    "pooling": ['avg'], #, 'max'
	"lr": [0.001], #[0.0001, 0.001, 0.01]
	"batch_size": [32], # [16, 32, 64]
    "rotation": [True], # [0, 90, 180]
    "width_shift": [0], #[0, 0.1, 0.2],
    "height_shift": [0], #[0, 0.1, 0.2],
    "activation": ["sigmoid"], # ["sigmoid", "softmax"]
    "loss_function": ["categorical_crossentropy"], # was binary  "categorical_crossentropy", "sparse_categorical_crossentropy"
    "dense": [1024], # [512, 1024],
    "model_name_size": [["resnet50", (224,224,3)],
                        # ["resnet50v2", (224,224,3)],
                        # ["resnet152", (224,224,3)],
                        # ["resnet152v2", (224,224,3)],
                        # ["vgg16", (224,224,3)],
                        # ["vgg19", (224,224,3)]
                        ],
    "train_layers": [False]
}

dummy_x, dummy_y = np.empty(1), np.empty(1)

# autonomio.github.io/talos/#/
scan = talos.Scan(
    x=dummy_x,
    y=dummy_y,
    model=wrapper,
    params=params,
    experiment_name='talos',
    print_params=True,
    save_weights=True
    )

In [None]:
from keras.applications.resnet_v2 import ResNet50V2, ResNet101V2, ResNet152V2

# resnet_model = ResNet50V2(weights='imagenet')
# resnet_model.summary()

# resnet_model = ResNet101V2(weights='imagenet')
# resnet_model.summary()

resnet_model = ResNet152V2(weights='imagenet')
# resnet_model.summary()

resnet_model.count_params()


In [None]:
# model_predict = tf.keras.models.load_model('models/model_tuned_200000.h5')
DS_DIR = Path('c:/Users/flori/download/subset')
DF_PATH = Path('C:/Users/flori/OneDrive/Documents/Uni/8_Master_thesis/code/subset_logs/20210518-001138.csv')

hp = {
    "samples": 2000,
	"batch_size": 32,                         # [16, 32, 64]
    "rotation": True,                         # [True, False]
    "width_shift": 0,                         # [0, 0.1, 0.2],
    "height_shift": 0,                        # [0, 0.1, 0.2],
    "input_shape": (224,224,3),               # [(64,64,1), (128,128,1)]
    "color_mode": "rgb",
    "model_name_size": ["resnet50", (224,224,3)]                # ["rgb", "grayscale"]
}
train_df, val_df, test_df = gen_dataset(DF_PATH, hp, undersample=False)
train_gen, val_gen, test_gen = make_generators(train_df, val_df, test_df, DS_DIR, hp)

In [None]:
models = ['resnet50', 'resnet50v2', 'resnet152v2', 'resnet152v2', 'vgg16', 'vgg19']

out = pd.DataFrame([], columns=['experiment', 'test_f1score', 'test_accuracy'])
for i in range(6):
    model_predict = activate_model(scan, i)
    preds = model_predict.predict(
        test_gen,
        # steps=10,
        verbose=1,
        workers=8,
        max_queue_size=16)

    y_pred = np.rint(preds).astype(int)
    y_true = np.array(test_gen.classes)
    y_true = MultiLabelBinarizer().fit_transform(y_true)
    f1 = f1_score(y_true[:len(y_pred)], y_pred, average='samples')
    acc = accuracy_score(y_true[:len(y_pred)], y_pred)
    print(models[i], f1, acc)
    
    out = out.append({'experiment': models[i], 'test_f1score': f1, "test_accuracy": acc}, ignore_index=True)
out

In [None]:
# out.round(3).to_csv('final_out_transfer.csv')