# TensorFlow/Keras Pre-processing

### We can easily load the images from Kaggle using TensorFlow's ImageDataGenerator. 
### We then use data augmentation in order to 'improve' and normalize the images.

In [7]:
from keras.preprocessing.image import ImageDataGenerator

test_dir = "Testing/"
train_dir = "Training/"

datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest"
)

testgen = ImageDataGenerator(rescale=1./255)
                             
train_generator = datagen.flow_from_directory(
    directory=train_dir,
    target_size=(150, 150),
    color_mode="grayscale",
    batch_size=64,
    class_mode="categorical",
    shuffle=True,
    seed=7
)

test_generator = datagen.flow_from_directory(
    directory=test_dir,
    target_size=(150, 150),
    color_mode="grayscale",
    batch_size=64,
    class_mode="categorical",
    shuffle=False,
)

Found 2870 images belonging to 4 classes.
Found 394 images belonging to 4 classes.


# Setting Up the Model

In [8]:
from keras.regularizers import l2
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, BatchNormalization

from sklearn.utils import compute_sample_weight

def train_model(train_generator, test_generator):
    model = Sequential()
    
    # 1st set of layers
    add_conv_block(model, 32, (5, 5), input_shape=(150, 150, 1))

    # 2nd, 3rd, 4th, 5th set of layers with `MaxPooling`
    for filters in [32, 64, 64, 128, 128, 256, 256]:
        add_conv_block(model, filters, (3, 3))
        if filters != 32:
            model.add(MaxPooling2D(padding="same"))

    # Flatten the image
    model.add(Flatten())

    # Fully connected layers with regularization
    model.add(Dense(1024, activation="relu", kernel_regularizer=l2(0.0001)))
    model.add(Dropout(0.5))

    model.add(Dense(128, activation="relu", kernel_regularizer=l2(0.0001)))
    model.add(Dropout(0.5))

    # Output layer
    model.add(Dense(4, activation="softmax"))
    
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy", "AUC"])
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.001)
    callbacks = [early_stopping, reduce_lr]

    model.fit(
        train_generator,
        epochs=10,
        validation_data=test_generator,
        callbacks=callbacks,
        shuffle=True,
    )
    
    return model

def add_conv_block(model, filters, kernel_size, padding="same", input_shape=None, reg_lambda=0.01):
    if input_shape:
        model.add(Conv2D(filters, kernel_size, padding=padding, input_shape=input_shape, kernel_regularizer=l2(reg_lambda)))
    else:
        model.add(Conv2D(filters, kernel_size, padding=padding, kernel_regularizer=l2(reg_lambda)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))

# Train N instances of the model
N = 5
models = [train_model(train_generator, test_generator) for _ in range(N)]

Epoch 1/10
Epoch 2/10
 8/45 [====>.........................] - ETA: 1:13 - loss: 7.1605 - accuracy: 0.3887 - auc: 0.6925

KeyboardInterrupt: 

In [None]:
import numpy as np

def ensemble_predictions(models, generator):
    preds = [model.predict(generator) for model in models]
    avg_preds = np.mean(preds, axis=0)
    return np.argmax(avg_preds, axis=1)

ensemble_val_preds = ensemble_predictions(models, test_generator)

# Evaluate ensemble performance

In [None]:
from sklearn.metrics import accuracy_score
ensemble_val_labels = np.argmax(ensemble_val_preds, axis=1)
accuracy = accuracy_score(y_val_true, ensemble_val_labels)
print(f'Ensemble Accuracy: {accuracy}')