In [2]:
# Standard library imports
import os

# Third-party libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from skimage.transform import rotate, AffineTransform, warp, resize
import tensorflow.keras.backend as K

# Keras and TensorFlow applications
from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications.inception_v3 import InceptionV3
from keras.applications.efficientnet_v2 import EfficientNetV2S
from tensorflow.keras.applications.resnet50 import preprocess_input

# Keras models and layers
from keras.models import Model, Sequential
from keras.layers import Dense, Input, Concatenate
from keras.optimizers import Adam
from keras.metrics import Precision, Recall
from keras.utils import Sequence

# Keras image preprocessing
from keras.preprocessing.image import load_img, img_to_array

1 GLOBAL VARIABLES:

In [3]:
# Image dimensions and input shape for models
IMG_HEIGHT = 224
IMG_WIDTH = 224
MODELS_INPUT_SHAPE = (IMG_HEIGHT, IMG_WIDTH, 3)

# Prefix for identifying layers in the Places-365 model for Early Fusion
PLACES_PREFIX = 'places'

# Data-related constants
VALIDATION_DATASET_SIZE = 0.2  # Proportion of dataset used for validation
DATA_LOCATION = ''
DATA_IMAGES_LOCATION = DATA_LOCATION + 'images/'
TARGET_VARIABLE_NAME = 'T1'
IMAGE_FILENAME_COLUMN = 'id'


# Training hyperparameters
LEARNING_RATE = 0.001
HIDDEN_LAYERS = [256, 128, 64, 32, 16]  # Dense layer sizes for feature fusion
NUM_CLASSES = 2  # Number of output classes for classification
EPOCHS = 20
BATCH_SIZE = 32
LOSS_FUNCTION = 'binary_crossentropy'

2 DATA LOADING:

In [4]:
original_data = pd.read_csv(DATA_LOCATION + 'binary_dataset.csv')
#id contains the filename without the image format. Just include .jpg to obtain it
original_data['id'] = original_data[IMAGE_FILENAME_COLUMN].apply(lambda x: DATA_LOCATION + 'images/' + x + '.jpg')

In [5]:
def return_datasets(data_df):
    # Create empty DataFrames with the same columns as the input data
    train_df = pd.DataFrame(columns=data_df.columns)
    val_df = pd.DataFrame(columns=data_df.columns)

    # Generate an array of indices and split them into training and validation sets
    train_inds, val_inds = train_test_split(
        np.array(list(range(data_df.shape[0]))),  # Array of row indices for data_df
        test_size=VALIDATION_DATASET_SIZE,        # Fraction of data to be used for validation
        random_state=42                           # Ensures reproducibility of the split
    )

    # Assign rows corresponding to the training indices to train_df and reset the index
    train_df = data_df.iloc[train_inds, :].reset_index(drop=True)

    # Assign rows corresponding to the validation indices to val_df and reset the index
    val_df = data_df.iloc[val_inds, :].reset_index(drop=True)

    # Return the training and validation DataFrames
    return train_df, val_df

train_df, val_df = return_datasets(original_data)

2.2 DEALING WITH IMBALANCED DATA

In [6]:
value_counts = train_df[TARGET_VARIABLE_NAME].value_counts()

# Find the minimum count of any value in the column
min_count = value_counts.min()

# Create a balanced DataFrame
balanced_df = pd.concat([
    train_df[train_df[TARGET_VARIABLE_NAME] == value].sample(min_count, random_state=42)
        for value in value_counts.index
    ])

#Shuffle the dataset to mix the labels
balanced_df = balanced_df.sample(n=len(balanced_df))
train_df = balanced_df.reset_index(drop=True)

2.3 DATAGENERATORKERAS CLASS:

In [7]:
class DataGeneratorKeras(Sequence):
    """
    Keras Data Generator for loading and preprocessing image datasets.

    This generator loads images from a dataset, applies optional augmentation and preprocessing,
    and returns batches of images along with their corresponding labels.

    Attributes:
        dataset (pd.DataFrame): DataFrame containing image file paths and target labels.
        batch_size (int): Number of samples per batch.
        image_shape (tuple): Shape of the input images (height, width, channels).
        filename_column (str): Column name containing image file paths.
        target_column (list): Column(s) containing target labels.
        augmentation (bool): Whether to apply data augmentation.
        preprocessing_fn (callable): Function to apply preprocessing to images.
    """

    def __init__(self, dataset, augmentation=False, preprocessing_fn=None,
                 filename_column='id', target_column=['T1'], image_shape=(224, 224, 3), batch_size=16):
        """
        Initializes the data generator.

        Args:
            dataset (pd.DataFrame): DataFrame containing image paths and target labels.
            augmentation (bool, optional): Whether to apply data augmentation. Defaults to False.
            preprocessing_fn (callable, optional): Function for additional image preprocessing. Defaults to None.
            filename_column (str, optional): Column name with image file paths. Defaults to 'id'.
            target_column (list, optional): List of column names containing labels. Defaults to ['T1'].
            image_shape (tuple, optional): Shape of the input images. Defaults to (224, 224, 3).
            batch_size (int, optional): Number of samples per batch. Defaults to 16.
        """
        super().__init__()
        self.dataset = dataset
        self.batch_size = batch_size
        self.image_shape = image_shape
        self.filename_column = filename_column
        self.target_column = target_column
        self.augmentation = augmentation
        self.preprocessing_fn = preprocessing_fn

    def __len__(self):
        """
        Computes the number of batches per epoch.

        Returns:
            int: Number of batches per epoch.
        """
        return len(self.dataset) // self.batch_size

    def on_epoch_end(self):
        """
        Shuffles the dataset at the end of each epoch to improve training variability.
        """
        self.dataset = self.dataset.sample(frac=1).reset_index(drop=True)

    def __getitem__(self, idx):
        """
        Generates a batch of data.

        Args:
            idx (int): Index of the batch.

        Returns:
            tuple: ((images, images), labels), where images are preprocessed input data,
                   and labels are the corresponding target values.
        """
        images = np.empty((self.batch_size, *self.image_shape), dtype=np.float32)
        labels = np.empty((self.batch_size, len(self.target_column)), dtype=np.float32)

        for i in range(self.batch_size):
            img_index = idx * self.batch_size + i
            image_path = self.dataset.iloc[img_index][self.filename_column]
            image = img_to_array(load_img(image_path, target_size=self.image_shape[:2]))

            # Apply optional augmentation
            if self.augmentation:
                image = rotate(image, np.random.uniform(-30, 30), preserve_range=True)
                if np.random.choice([True, False]):  # Random horizontal flip
                    image = np.flip(image, axis=1)

            # Apply optional preprocessing function
            if self.preprocessing_fn:
                image = self.preprocessing_fn(image)

            images[i] = image
            labels[i] = self.dataset.iloc[img_index][self.target_column].values.astype(np.float32)

        return (images, images), labels


2.4 GENERATE THE DATAGENERATOR TO BE USED IN THE MODEL TRAINING:

In [8]:
train_datagen = DataGeneratorKeras(dataset = train_df, augmentation = True, preprocessing_fn = preprocess_input, batch_size = 32)
valid_datagen = DataGeneratorKeras(dataset = val_df, augmentation = False, preprocessing_fn = preprocess_input, batch_size = 32)

3 PLACES365 CLASS:

In [1]:
from keras import backend as K
from keras.layers import Input, Dense, Flatten, Dropout, MaxPooling2D, Conv2D
from keras.models import Model
from keras.regularizers import l2
from keras.utils import get_file

WEIGHTS_PATH = 'https://github.com/GKalliatakis/Keras-VGG16-places365/releases/download/v1.0/vgg16-places365_weights_tf_dim_ordering_tf_kernels.h5'
L2_PARAMETER = 0.0002


def conv_block(x, filters, convs, prefix):
    """
    Creates a convolutional block with a given number of convolutional layers.

    Args:
        x (tensor): Input tensor.
        filters (int): Number of filters for each convolutional layer.
        convs (int): Number of convolutional layers in the block.
        prefix (str): Prefix for layer names.

    Returns:
        tensor: Output tensor after applying convolutions and max pooling.
    """
    for i in range(convs):
        x = Conv2D(filters, kernel_size=3, padding='same',
                   kernel_regularizer=l2(L2_PARAMETER), activation='relu',
                   name=f'{prefix}_conv{i+1}')(x)
    return MaxPooling2D(pool_size=(2, 2), name=f'{prefix}_pool')(x)

def VGG16_Places365(weights='places', input_shape=None, prefix=''):

    classes = 365

    """
    Builds the VGG16 model for Places365 classification.

    Args:
        include_top (bool): Whether to include the fully connected layers.
        weights (str): Type of pre-trained weights to use ('places' for Places365).
        input_shape (tuple): Shape of the input images.
        prefix (str): Prefix for layer names.

    Returns:
        Model: Keras model instance of VGG16-Places365.
    """
    input_tensor = Input(shape=input_shape)

    # Define the convolutional blocks
    x = conv_block(input_tensor, filters=64, convs=2, prefix=f'{prefix}block1')
    x = conv_block(x, filters=128, convs=2, prefix=f'{prefix}block2')
    x = conv_block(x, filters=256, convs=3, prefix=f'{prefix}block3')
    x = conv_block(x, filters=512, convs=3, prefix=f'{prefix}block4')
    x = conv_block(x, filters=512, convs=3, prefix=f'{prefix}block5')

    # Fully connected layers
    x = Flatten(name=f'{prefix}flatten')(x)
    x = Dense(4096, activation='relu', name=f'{prefix}fc1')(x)
    x = Dropout(0.5, name=f'{prefix}drop_fc1')(x)
    x = Dense(4096, activation='relu', name=f'{prefix}fc2')(x)
    x = Dropout(0.5, name=f'{prefix}drop_fc2')(x)
    x = Dense(classes, activation='softmax', name=f'{prefix}predictions')(x)

    # Create model
    model = Model(input_tensor, x, name='vgg16-places365')

    # Load weights only if requested
    if weights == 'places':
        weights_path = get_file('vgg16-places365_weights_tf_dim_ordering_tf_kernels.h5',
                                WEIGHTS_PATH, cache_subdir='models')
        model.load_weights(weights_path)

    return model


4 EARLY-FUSION MODEL GENERATION:

In [10]:
def create_places_365_model():
    places365_model = VGG16_Places365(weights='places', prefix=PLACES_PREFIX, input_shape = (MODELS_INPUT_SHAPE))
    places365_model.trainable = False
    return places365_model


def create_earlyfusion_model(cnn_model, model_layer):
    cnn_model. trainable = False
    places365_model = create_places_365_model()
    model_365_features = places365_model.get_layer(f'{PLACES_PREFIX}fc2').output
    cnn_model_features = cnn_model.get_layer(model_layer).output

    x = Concatenate()([model_365_features, cnn_model_features])
    for units in [256, 128, 64, 32, 16]:
        x = Dense(units=units, activation='relu')(x)

    x = Dense(units=1, activation='sigmoid')(x)

    model = Model(inputs=[places365_model.input, cnn_model.input], outputs=x, name='early-fusion')

    return model

In [11]:
input_tensor = Input(shape=MODELS_INPUT_SHAPE)

models = {
    #'vgg19_binary.keras': (VGG19(weights='imagenet', input_tensor=input_tensor), 'fc2')
    'inceptionv3_binary.keras': (InceptionV3(weights='imagenet', input_tensor=input_tensor), 'avg_pool')
    #'resnet50_binary.keras': (ResNet50(weights='imagenet', input_tensor=input_tensor), 'avg_pool'),
    #'efficientnetv2s_binary.keras': (EfficientNetV2S(weights='imagenet', input_tensor=input_tensor), 'avg_pool')
}

model_to_use = 'inceptionv3_binary.keras'

early_fusion_model = create_earlyfusion_model(models[model_to_use][0], models[model_to_use][1])


early_fusion_model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(learning_rate=LEARNING_RATE),
        metrics=['accuracy', Precision(), Recall()]
    )


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels.h5
[1m96112376/96112376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 0us/step
Downloading data from https://github.com/GKalliatakis/Keras-VGG16-places365/releases/download/v1.0/vgg16-places365_weights_tf_dim_ordering_tf_kernels.h5
[1m543085444/543085444[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 0us/step


5.1 MODEL TRAINING:

In [None]:
history = early_fusion_model.fit(
    x = train_datagen,
    epochs = EPOCHS,
    validation_data = valid_datagen
)

Epoch 1/20


Expected: ['keras_tensor_313', ['keras_tensor']]
Received: inputs=('Tensor(shape=(None, 224, 224, 3))', 'Tensor(shape=(None, 224, 224, 3))')


[1m15/50[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m16:41[0m 29s/step - accuracy: 0.5484 - loss: 1.0859 - precision: 0.5693 - recall: 0.6563