In [72]:
from google.colab import drive
drive.mount('/content/gdrive/')

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [73]:
# Standard Libraries
import os
import numpy as np
import pandas as pd

# Image Processing
import cv2
from skimage.transform import rotate, AffineTransform, warp
from keras.preprocessing.image import load_img, img_to_array

# Plotting
import matplotlib.pyplot as plt

# Deep Learning
import tensorflow as tf
from keras.models import Model
from keras.applications.densenet import DenseNet121
from keras.layers import Dense, Flatten, Input
from keras.metrics import Precision, Recall
from keras.optimizers import Adam
from keras.utils import Sequence
from tensorflow.keras.applications.resnet50 import preprocess_input

# Data Balancing and Splitting
from sklearn.model_selection import train_test_split

In [74]:
# Image dimensions and input shape for models
IMG_HEIGHT = 224
IMG_WIDTH = 224
MODELS_INPUT_SHAPE = (IMG_HEIGHT, IMG_WIDTH, 3)

# Data-related constants
VALIDATION_DATASET_SIZE = 0.2  # Proportion of dataset used for validation
DATA_LOCATION = ''
DATA_IMAGES_LOCATION = DATA_LOCATION + 'images/'
TARGET_VARIABLE_NAME = 'T1'
IMAGE_FILENAME_COLUMN = 'id'
NUM_CLASSES = 0

# Training hyperparameters
LEARNING_RATE = 0.001
HIDDEN_LAYERS = [256, 128, 64, 32, 16]  # Dense layer sizes for feature fusion
EPOCHS = 20
BATCH_SIZE = 32
LOSS_FUNCTION = 'binary_crossentropy'

In [75]:
data = pd.read_csv(DATA_LOCATION + 'T2_powerlabel_dataset.csv')

In [76]:
def return_datasets(data_df):
    # Create empty DataFrames with the same columns as the input data
    train_df = pd.DataFrame(columns=data_df.columns)
    val_df = pd.DataFrame(columns=data_df.columns)

    # Generate an array of indices and split them into training and validation sets
    train_inds, val_inds = train_test_split(
        np.array(list(range(data_df.shape[0]))),  # Array of row indices for data_df
        test_size=VALIDATION_DATASET_SIZE,        # Fraction of data to be used for validation
        random_state=42                           # Ensures reproducibility of the split
    )

    # Assign rows corresponding to the training indices to train_df and reset the index
    train_df = data_df.iloc[train_inds, :].reset_index(drop=True)

    # Assign rows corresponding to the validation indices to val_df and reset the index
    val_df = data_df.iloc[val_inds, :].reset_index(drop=True)

    # Return the training and validation DataFrames
    return train_df, val_df

train_df, val_df = return_datasets(data)

#Oversampling:

powercount = {}
powerlabels = np.unique(train_df['powerlabel'])
for p in powerlabels:
    powercount[p] = np.count_nonzero(train_df['powerlabel']==p)

maxcount = np.max(list(powercount.values()))
for p in powerlabels:
    gapnum = maxcount - powercount[p]
    temp_df = train_df.iloc[np.random.choice(np.where(train_df['powerlabel']==p)[0],size=gapnum)]
    train_df = pd.concat([train_df, temp_df], ignore_index=True)

train_df = train_df.sample(frac=1).reset_index(drop=True)
train_df.drop(columns=['powerlabel'], inplace=True)

In [77]:
class_names = list(data.columns)[1:-1]
NUM_CLASSES = len(class_names)

In [78]:
class DataGeneratorKeras(Sequence):
    """
    Keras Data Generator for loading and preprocessing image datasets.

    This generator loads images from a dataset, applies optional augmentation and preprocessing,
    and returns batches of images along with their corresponding labels.

    Attributes:
        dataset (pd.DataFrame): DataFrame containing image file paths and target labels.
        batch_size (int): Number of samples per batch.
        image_shape (tuple): Shape of the input images (height, width, channels).
        filename_column (str): Column name containing image file paths.
        target_column (list): Column(s) containing target labels.
        augmentation (bool): Whether to apply data augmentation.
        preprocessing_fn (callable): Function to apply preprocessing to images.
    """

    def __init__(self, dataset, augmentation=False, preprocessing_fn=None,
                 filename_column='id', target_column=['T1'], image_shape=(224, 224, 3), batch_size=16):
        """
        Initializes the data generator.

        Args:
            dataset (pd.DataFrame): DataFrame containing image paths and target labels.
            augmentation (bool, optional): Whether to apply data augmentation. Defaults to False.
            preprocessing_fn (callable, optional): Function for additional image preprocessing. Defaults to None.
            filename_column (str, optional): Column name with image file paths. Defaults to 'id'.
            target_column (list, optional): List of column names containing labels. Defaults to ['T1'].
            image_shape (tuple, optional): Shape of the input images. Defaults to (224, 224, 3).
            batch_size (int, optional): Number of samples per batch. Defaults to 16.
        """
        super().__init__()
        self.dataset = dataset
        self.batch_size = batch_size
        self.image_shape = image_shape
        self.filename_column = filename_column
        self.target_column = target_column
        self.augmentation = augmentation
        self.preprocessing_fn = preprocessing_fn

    def __len__(self):
        """
        Computes the number of batches per epoch.

        Returns:
            int: Number of batches per epoch.
        """
        return len(self.dataset) // self.batch_size

    def on_epoch_end(self):
        """
        Shuffles the dataset at the end of each epoch to improve training variability.
        """
        self.dataset = self.dataset.sample(frac=1).reset_index(drop=True)

    def __getitem__(self, idx):
        """
        Generates a batch of data.

        Args:
            idx (int): Index of the batch.

        Returns:
            tuple: ((images, images), labels), where images are preprocessed input data,
                   and labels are the corresponding target values.
        """
        images = np.empty((self.batch_size, *self.image_shape), dtype=np.float32)
        labels = np.empty((self.batch_size, NUM_CLASSES), dtype=np.float32)

        for i in range(self.batch_size):
            img_index = idx * self.batch_size + i
            image_path = self.dataset.iloc[img_index][self.filename_column]
            image = img_to_array(load_img(image_path, target_size=self.image_shape[:2]))

            # Apply optional augmentation
            if self.augmentation:
                image = rotate(image, np.random.uniform(-30, 30), preserve_range=True)
                if np.random.choice([True, False]):  # Random horizontal flip
                    image = np.flip(image, axis=1)

            # Apply optional preprocessing function
            if self.preprocessing_fn:
                image = self.preprocessing_fn(image)

            images[i] = image
            labels[i] = self.dataset.iloc[idx*self.batch_size+i][class_names].values.astype(np.float32)

        return images, labels

In [79]:
train_datagen = DataGeneratorKeras(dataset = train_df, augmentation = True, preprocessing_fn = preprocess_input, batch_size = BATCH_SIZE)
valid_datagen = DataGeneratorKeras(dataset = val_df, augmentation = False, preprocessing_fn = preprocess_input, batch_size = BATCH_SIZE)

In [80]:
# Define the input
inputs = Input(shape=MODELS_INPUT_SHAPE)

#base_model = VGG19(weights='imagenet', input_tensor=inputs)
#base_model = InceptionV3(weights='imagenet', input_tensor=inputs)
#base_model = ResNet50(weights='imagenet', input_tensor=inputs)
#base_model = EfficientNetV2S(weights='imagenet', input_tensor=inputs)
base_model = DenseNet121(input_tensor=inputs)

base_model.trainable = False

# Add global average pooling layer
x = Flatten(name='flatten2')(base_model.output)
#x = GlobalAveragePooling2D()(base_model.output)
for units in [256, 128, 64, 32, 16]:
    x = Dense(units=units, activation='relu')(x)
outputs = Dense(NUM_CLASSES, activation='sigmoid')(x)

# Create the model
model = Model(inputs, outputs)

# Compile the model
model.compile(optimizer = Adam(LEARNING_RATE), loss = LOSS_FUNCTION, metrics = ['accuracy', Precision(), Recall()])

In [None]:
history = model.fit(train_datagen, epochs = EPOCHS, verbose = 1, validation_data = valid_datagen)

Epoch 1/20
[1m 12/320[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m49:10[0m 10s/step - accuracy: 0.3900 - loss: 0.6919 - precision_2: 0.4650 - recall_2: 0.6031