In [1]:
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.applications.xception import preprocess_input
from keras.preprocessing.image import ImageDataGenerator

import numpy as np
import os


In [2]:
wd = os.path.sep.join([os.getcwd(), "data"])
train_path = os.path.sep.join([wd, "train"])
test_path = os.path.sep.join([wd, "test"])

## Declaration of the image generators used to train models without having to load images in cache

In [11]:
epochs = 50
callback = lambda x: [EarlyStopping(monitor='val_loss', patience=2, mode="min"), ModelCheckpoint(filepath=f"{wd}{x}_model.h5", monitor='val_loss', save_best_only=True)]
totalTrain = 166730
totalVal = 25009
totalTest = ...

In [3]:
img_datagen = ImageDataGenerator(preprocessing_function=preprocess_input, validation_split=.15)
training_gen = img_datagen.flow_from_directory(train_path, target_size=(51,51), subset="training", color_mode='rgb', batch_size=32, shuffle=True)
validation_gen = img_datagen.flow_from_directory(train_path, target_size=(51,51), subset="validation", color_mode='rgb', batch_size=32, shuffle=True)
test_gen = img_datagen.flow_from_directory(test_path, target_size=(51,51), color_mode='rgb', batch_size=32, shuffle=True)

Found 141722 images belonging to 2 classes.
Found 25008 images belonging to 2 classes.
Found 110794 images belonging to 2 classes.


## I - CNN model with unbalanced classes (without data augmentation)

In [4]:
from sklearn.utils import class_weight

class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(training_gen.classes), y=training_gen.classes)

train_class_weights = dict(enumerate(class_weights))
train_class_weights

{0: 0.6984751259228593, 1: 1.7596036850338954}

In [14]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation="relu"))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(32, (3, 3), activation="relu"))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(64, (3, 3), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

model.add(Dense(64, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(1, activation="sigmoid"))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x=training_gen, validation_data=validation_gen, class_weight=train_class_weights, callbacks=callback("CNN_unbalanced"),
          batch_size=64, steps_per_epoch=totalTrain // 64, validation_steps=totalVal // 64,)


 486/2605 [====>.........................] - ETA: 7:05 - loss: 0.6901 - accuracy: 0.5000

KeyboardInterrupt: 

## II - CNN model with balanced classes data augmentation