# Creating K-fold Dataset

In [None]:
import sklearn
sklearn.__version__

'1.2.2'

In [None]:
import os
import random
import shutil
from sklearn.model_selection import KFold, train_test_split

input_dataset_dir='./drive/MyDrive/Datasets/original_dataset/'
output_dataset_dir='./drive/MyDrive/Datasets/kfold_dataset/'


def create_splits(k_folds=5):
    class_names = os.listdir(input_dataset_dir)

    # creating output directory
    os.makedirs(output_dataset_dir, exist_ok=True)

    # Perform K-Fold cross-validation
    kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

    # Iterating over each class
    for class_name in class_names:
        class_path = os.path.join(input_dataset_dir, class_name)

        # Listing all images of that class
        all_images = os.listdir(class_path)
        random.shuffle(all_images)
        # Getting labels for each image
        labels = [class_name] * len(all_images)

        # Split the dataset into train and test sets using k-fold
        for fold, (_, _) in enumerate(kf.split(all_images, labels)):
            # Create fold directories
            train_fold_dir = os.path.join(output_dataset_dir, f'fold_{fold + 1}', 'train', class_name)
            val_fold_dir = os.path.join(output_dataset_dir, f'fold_{fold + 1}', 'validation', class_name)
            test_fold_dir = os.path.join(output_dataset_dir, f'fold_{fold + 1}', 'test', class_name)

            os.makedirs(train_fold_dir, exist_ok=True)
            os.makedirs(val_fold_dir, exist_ok=True)
            os.makedirs(test_fold_dir, exist_ok=True)

            #print(f'Directories created...')

            #print(f'Doing train-test-split...')
            train_images, val_test_images = train_test_split(all_images, test_size=0.4, stratify=labels,
                                                             random_state=42)
            #print(f'Doing validation test split...')
            val_images, test_images = train_test_split(val_test_images, test_size=0.5,
                                                       stratify=[class_name] * len(val_test_images), random_state=42)

            #print('Copying training images...')
            for image in train_images:
                shutil.copy(os.path.join(class_path, image), os.path.join(train_fold_dir, image))

            #print('Copying validation images...')
            for image in val_images:
                shutil.copy(os.path.join(class_path, image), os.path.join(val_fold_dir, image))

            #print('Copying test images...')
            for image in test_images:
                shutil.copy(os.path.join(class_path, image), os.path.join(test_fold_dir, image))


create_splits()


# Creating Model Structure

In [None]:
# Import necessary libraries
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, BatchNormalization, Activation, GlobalAveragePooling2D
from keras.optimizers import Adam


class CNNModel:
    def __init__(self, input_shape, num_classes):
        self.model = CNNModel.build_model(input_shape, num_classes)

    @staticmethod
    def build_model(input_shape, num_classes):
        model = Sequential()

        model.add(Conv2D(32, (3, 3), padding='same', strides=(1, 1), input_shape=input_shape, use_bias=False))
        model.add(BatchNormalization(scale=False))
        model.add(Activation('relu'))

        model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))

        model.add(Conv2D(64, (3, 3), padding='same', strides=(1, 1), input_shape=(55, 55, 32), use_bias=False))
        # model.add(Conv2D(64, (3, 3), padding='same', strides=(1, 1), use_bias=False))
        model.add(BatchNormalization(scale=False))
        model.add(Activation('relu'))

        model.add(Conv2D(96, (3, 3), padding='same', strides=(1, 1), input_shape=(55, 55, 64), use_bias=False))
        model.add(BatchNormalization(scale=False))
        model.add(Activation('relu'))

        model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))

        model.add(Conv2D(128, (3, 3), padding='same', strides=(1, 1), input_shape=(27, 27, 96), use_bias=False))
        model.add(BatchNormalization(scale=False))
        model.add(Activation('relu'))

        model.add(Conv2D(256, (3, 3), padding='same', strides=(1, 1), input_shape=(27, 27, 128), use_bias=False))
        model.add(BatchNormalization(scale=False))
        model.add(Activation('relu'))

        model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))

        model.add(Conv2D(384, (3, 3), padding='same', strides=(1, 1), input_shape=(13, 13, 256), use_bias=False))
        model.add(BatchNormalization(scale=False))
        model.add(Activation('relu'))

        model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))

        model.add(GlobalAveragePooling2D())

        model.add(Dense(64, activation='softmax'))
        model.add(Dense(num_classes, activation='softmax'))

        model.summary()
        # keras.utils.plot_model(model, show_shapes=True)

        # Compile the model
        model.compile(optimizer='adam',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        return model

    def summary(self):
        self.model.summary()

    def train(self, train_dataset, validation_dataset, epochs, batch_size, cp_callback):
        history = self.model.fit(train_dataset,
                                 epochs=epochs,
                                 batch_size=batch_size,
                                 validation_data=validation_dataset,
                                 callbacks=[cp_callback])
        return history

    def evaluate(self, test_dataset):
        return self.model.evaluate(test_dataset)

    def predict(self, test_dataset):
      return self.model.predict(test_dataset)

    def save(self, path):
      return self.model.save(path)


image_shape=(112,112,3)
num_classes=5
model = CNNModel(input_shape=image_shape, num_classes=num_classes)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 112, 112, 32)      864       
                                                                 
 batch_normalization (Batch  (None, 112, 112, 32)      96        
 Normalization)                                                  
                                                                 
 activation (Activation)     (None, 112, 112, 32)      0         
                                                                 
 max_pooling2d (MaxPooling2  (None, 55, 55, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 55, 55, 64)        18432     
                                                                 
 batch_normalization_1 (Bat  (None, 55, 55, 64)        1

# Training loop

In [None]:
#from model import CNNModel
import keras.utils
import matplotlib.pyplot as plt
import datetime, os

# Parameters
k_folds=5
batch_size=32
image_size= (112,112)
image_shape=(112,112,3)
num_classes=5
epochs=40

for fold in range(k_folds):
    print(f'Fold {fold+1} begins...')
    model = CNNModel(input_shape=image_shape, num_classes=num_classes)
    # Getting paths for each fold
    train_dir = f'./drive/MyDrive/Datasets/kfold_dataset/fold_{fold+1}/train'
    val_dir = f'./drive/MyDrive/Datasets/kfold_dataset/fold_{fold+1}/validation'
    test_dir = f'./drive/MyDrive/Datasets/kfold_dataset/fold_{fold+1}/test'

    # Loading train, validation and test datasets for each fold
    train_dataset = keras.utils.image_dataset_from_directory(
        directory=train_dir,
        labels='inferred',
        label_mode='categorical',
        batch_size=batch_size,
        image_size=image_size
    )

    val_dataset = keras.utils.image_dataset_from_directory(
        directory=val_dir,
        labels='inferred',
        label_mode='categorical',
        batch_size=batch_size,
        image_size=image_size
    )

    test_dataset = keras.utils.image_dataset_from_directory(
        directory=test_dir,
        labels='inferred',
        label_mode='categorical',
        batch_size=batch_size,
        image_size=image_size
    )
    #----------------------------------------------------------

    unique = datetime.datetime.now().strftime('%Y%m%d%H%M%S')

    # Saving checkpoints
    checkpoint_path=f'./drive/MyDrive/Datasets/models/saved_weights/cnn_model_epoch_{epochs}_fold_{fold+1}_{unique}.ckpt'
    checkpoint_dir =os.path.dirname(checkpoint_path)
    cp_callback = keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

    # Training
    history = model.train(train_dataset, val_dataset, epochs, batch_size, cp_callback)

    # Plotting accuracy
    plt.plot(history.history['accuracy'], label='accuracy')
    plt.plot(history.history['val_accuracy'], label='validation accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc='lower right')
    plt.title(f'Accuracy graph for fold {fold+1}')
    plt.plot()
    #plt.show()
    plt.savefig(f'./drive/MyDrive/Datasets/results/accuracy_epoch_{epochs}_fold_{fold+1}_{unique}.png')
    plt.show()

    # Plotting loss
    plt.plot(history.history['loss'], label='loss')
    plt.plot(history.history['val_loss'], label='validation loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc='lower right')
    plt.title(f'Loss graph for fold {fold+1}')
    plt.plot()
    #plt.show()
    plt.savefig(f'./drive/MyDrive/Datasets/results/loss_epoch_{epochs}_fold_{fold+1}_{unique}.png')
    plt.show()

    # Prediction
    loss, accuracy = model.evaluate(test_dataset)
    print(f'Test Loss: {loss}')
    print(f'Test Accuracy: {accuracy}')

    # Saving entire model
    model.save(f'./drive/MyDrive/Datasets/models/saved_models/cnn_model_epoch_{epochs}_fold_{fold+1}_{unique}.keras')

    print(f'Fold {fold+1} ends...')

Fold 1 begins...
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 112, 112, 32)      864       
                                                                 
 batch_normalization (Batch  (None, 112, 112, 32)      96        
 Normalization)                                                  
                                                                 
 activation (Activation)     (None, 112, 112, 32)      0         
                                                                 
 max_pooling2d (MaxPooling2  (None, 55, 55, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 55, 55, 64)        18432     
                                                                 
 batch_normalization_1 (Bat  (None, 55,

In [None]:
import keras
from sklearn.metrics import confusion_matrix, classification_report
import os
import numpy as np
import tensorflow as tf
from sklearn.metrics import accuracy_score

k_folds = 5
p1 = './models/saved_models/'
uid = '20231126'
batch_size = 32
image_size = (112, 112, 3)

for fold in k_folds:
    test_dir = f'./kfold_dataset/fold_{fold + 1}/test'

    test_dataset = keras.utils.image_dataset_from_directory(
        directory=test_dir,
        labels='inferred',
        label_mode='categorical',
        batch_size=batch_size,
        image_size=image_size
    )

    true_labels = []
    for features, labels in test_dataset:
        true_labels.extend(tf.argmax(labels, axis=1).numpy())

    true_labels = np.array(true_labels)

    model = keras.models.load_model(os.path.join(p1, f'cnn_model_epoch_40_fold_{fold}_{uid}.keras'))

    print(f'Printing results for fold{fold + 1}:')
    print('----------------------------------')

    y_pred = model.predict(test_dataset)
    y_pred_classes = np.argmax(y_pred, axis=1)
    accuracy_sk = accuracy_score(true_labels, y_pred_classes)

    _, accuracy = model.evaluate(test_dataset)

    print(f'Accuracy from sklearn accuracy score: {accuracy_sk}')
    print(f'Accuracy from model.evaluate(): {accuracy}')

    correct_labels = np.where(y_pred_classes == true_labels)[0]
    wrong_labels = np.where(y_pred_classes != true_labels)[0]

    print(f'Correct Labels: {len(correct_labels)}')
    print(f'Wrong Labels: {len(wrong_labels)}')

    cm = confusion_matrix(true_labels, y_pred_classes)

    class_report = classification_report(true_labels, y_pred_classes)
    print('Classification Report:\n', class_report)

    print(f'Fold {fold+1} evaluation ends.')
    print('------------------------------------')