# CNN Model

#### Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
# dont use gpu [for M1/M2 Metal, cause it cant handle Dropout Layers too well], comment out if you want to use GPU
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.set_visible_devices(physical_devices[1:], 'GPU')
from tensorflow.keras.callbacks import EarlyStopping
from keras import backend as K 
import gc
from tensorflow.keras.optimizers.legacy import Adam
from sklearn.model_selection import KFold,StratifiedKFold

from tensorflow.keras import regularizers
import tensorflow_model_optimization as tfmot
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import json

from tensorflow.keras.applications import VGG19
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from sklearn.model_selection import train_test_split
import cv2
from tensorflow.keras.applications.resnet50 import ResNet50

#### GPU deaktivieren

**Hinweis:** Auskommentieren für M1/M2-Chips, da diese Dropout-Layer mit GPU nicht gut handeln können

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.set_visible_devices(physical_devices[1:], 'GPU')

#### Daten einlesen

**Hinweis:** Die Daten wurden in einer pkl-Datei gespeichert, um nicht jedes mal die Vorverarbeitung durchführen zu müssen und unkompliziert zwischen den Modelklassen hin- und her wechseln zu können

In [None]:
data = pd.read_pickle('./data/images_df_numerical.pkl')
classes = data["Species"].unique()
number_of_classes = classes.size
X, y = data['data'], data['Species']
# Wir hatten massive Probleme mit der Begrenztheit unseres RAMs, weshalb wir versucht haben die Usage an mehreren Stellen zu reduzieren
X, y = np.stack(X).astype(np.uint8), y.to_numpy().astype(np.uint8)

#### Normalisierung der Daten

Daten werden auf in ein Intervall von [0, 1] gebracht, dadurch wird die Konvergenzgeschwindigkeit verbessert

In [None]:
X = X / 255.

#### Seed setzen

Um die Nachvollziehbarkeit zu erhöhen setzen wir den Seed immer auf den selben Startwert

In [None]:
tf.keras.utils.set_random_seed(1)

Bei Verwendung einer GPU macht es die Operationen so deterministisch wie möglich

**Hinweis:** Diese Option vermindert die Performance

In [None]:
tf.config.experimental.enable_op_determinism()

#### Bilder reshapen

In [None]:
image_size = X[0].size
samples = X.size
with open("./data/meta.json","r") as file:  # Shape der Bilder nach dem resizen aus dem data_prep notebook
    image_meta = json.load(file) 
image_shape = (image_meta['h'],image_meta['w'],image_meta['c'])

In [None]:
X = X.reshape((-1,) + image_shape)
print(f"Image has shape: {image_shape}")

#### k-Fold-Cross-Validation

Wir benutzen 10-Fold-Cross-Validation, um das Ergebnis weniger vom gewählten Split abhängig zu machen und somit das Ergebnis zu stabilisieren. Statified stellt sicher, dass die Klasseneinteilung beibehalten wird. Dies ist vor allem ohne Resampling interessant, da wir extrem ungleich verteilte Klassen haben.

In [None]:
kfold = StratifiedKFold(n_splits=10, shuffle=True)

#### Modell

In [None]:
early_stopping = EarlyStopping(monitor='val_accuracy', patience=30, min_delta=0.001, start_from_epoch=15, restore_best_weights=True)
epochs = 200
batch_size = 32
dropout_rate = 0.2
weight_decay_alpha = 0.01

def create_model():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Input(shape=image_shape,name="aaa"))
    model.add(tf.keras.layers.Conv2D(32, 3, strides=2, padding='same', activation='relu', kernel_regularizer=regularizers.l2(weight_decay_alpha)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout_rate))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout_rate))
    model.add(tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu', kernel_regularizer=regularizers.l2(weight_decay_alpha)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout_rate))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout_rate))
    model.add(tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu', kernel_regularizer=regularizers.l2(weight_decay_alpha)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout_rate))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(weight_decay_alpha)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout_rate))
    model.add(tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(weight_decay_alpha)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(dropout_rate))
    model.add(tf.keras.layers.Dense(number_of_classes, activation='softmax'))

    return model

In [None]:
def fit_model(model, X_train, y_train, X_val=None, y_val=None):
    if X_val is None or y_val is None:
        history = model.fit(
            X_train,
            y_train,
            epochs=epochs,
            batch_size=batch_size,
            callbacks=[early_stopping, tfmot.sparsity.keras.UpdatePruningStep()],
            validation_split=0.2,
            verbose=1)
    elif X_val is not None and y_val is not None:
        history = model.fit(
            X_train,
            y_train,
            epochs=epochs,
            batch_size=batch_size,
            callbacks=[early_stopping, tfmot.sparsity.keras.UpdatePruningStep()],
            validation_data=(X_val,y_val),
            verbose=1)
    return history

In [None]:
end_step = np.ceil(X.shape[0] / batch_size).astype(np.int32) * epochs

pruning_params = {
    # In this example, you start the model with 50% sparsity (50% zeros in weights) and end with 80% sparsity.
      'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50,
                                                                final_sparsity=0.80,
                                                                begin_step=0,
                                                                end_step=end_step)}

model = create_model()
model = tfmot.sparsity.keras.prune_low_magnitude(model, **pruning_params)

In [None]:
def train_val_test_split(train_indezes, test_indezes):
    X_train, X_val, y_train, y_val = train_test_split(X[train_indezes], y[train_indezes], test_size=0.2,stratify=y[train_indezes], random_state=42)
    X_test, y_test = X[test_indezes], y[test_indezes]
    
    # Das speichern als Tensor spart RAM
    return (tf.convert_to_tensor(X_train),
            tf.convert_to_tensor(y_train), 
            tf.convert_to_tensor(X_val),
            tf.convert_to_tensor(y_val),
            tf.convert_to_tensor(X_test),
            tf.convert_to_tensor(y_test))

In [None]:
# Abspeichern der Erebnisse jedes Splits für die Confusion-Matrix
true_labels = list()
pred_labels = list()
train_accuracies = list()
test_accuracies = list()
val_accuracies = list()
train_losses = list()
val_losses = list()
test_losses = list()

for train_indezes, test_indezes in kfold.split(X, y):
    # wir löschen das Model der letzten Iteration aus dem Cache um RAM zu sparen
    K.clear_session()

    # Daten splitten
    X_train, y_train, X_val, y_val, X_test, y_test = train_val_test_split(train_indezes, test_indezes)

    # Resample nur Trainings- und Validationmenge
    #X_train, y_train = resample_after_split(X_train, y_train)
    #X_val, y_val = resample_after_split(X_val, y_val)

    model.compile(optimizer=Adam(0.001), loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    history = fit_model(model, X_train, y_train, X_val, y_val)

    # Für die Confusion Matrix
    predictions = np.argmax(model.predict(X_test), axis=-1)
    true_labels.extend(y_test)
    pred_labels.extend(predictions)

    # Für die Accuracy-Curves
    train_accuracies.extend(history.history['accuracy'])
    val_accuracies.extend(history.history['val_accuracy'])

    # Für die Loss-Curves
    train_losses.extend(history.history['loss'])
    val_losses.extend(history.history['val_loss'])

    loss, accuracy = model.evaluate(X_test, y_test)
    test_accuracies.append(accuracy)
    test_losses.append(loss)

#### VGG19 und ResNet50 als Vergleich

**Hinweis:** Um die Lesbarkeit zu verbessern stehen die beiden Modelle in eigenen Stellen. Natürlich wäre das Ergebnis noch ein klein bisschen aussagekräftiger, weniger rechenintensiv und RAM sparrender, wenn wir die Splits unseres eigenen Models wiederverwenden würden

##### VGG19

In [None]:
early_stopping = EarlyStopping(monitor='val_accuracy', patience=30, min_delta=0.001, start_from_epoch=15, restore_best_weights=True)
epochs = 200
batch_size = 32

vgg19_test_accuracies = list()

base_model = VGG19(weights='imagenet', include_top=False, input_shape=image_shape)

# Convolution-Teil fixieren
for layer in base_model.layers:
    layer.trainable = False

model = Sequential()
model.add(base_model)
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(weight_decay_alpha)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(dropout_rate))
model.add(tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(weight_decay_alpha)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(dropout_rate))
model.add(Dense(6, activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

for train_indezes, test_indezes in kfold.split(X, y):
    K.clear_session()
    X_train, y_train = tf.convert_to_tensor(X[train_indezes]), tf.convert_to_tensor(y[train_indezes])
    X_test, y_test = tf.convert_to_tensor(X[test_indezes]), tf.convert_to_tensor(y[test_indezes])
    history = model.fit(X_train, y_train, batch_size=batch_size, callbacks=[early_stopping], epochs=epochs, validation_split=0.2, verbose=1)
    _, accuracy = model.evaluate(X_test, y_test)
    vgg19_test_accuracies.append(accuracy)

##### ResNet50

In [None]:
early_stopping = EarlyStopping(monitor='val_accuracy', patience=30, min_delta=0.001, start_from_epoch=15, restore_best_weights=True)
epochs = 200
batch_size = 32

resnet50_test_accuracies = list()

base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)

# Convolution-Teil fixieren
for layer in base_model.layers:
    layer.trainable = False

model = Sequential()
model.add(base_model)
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(weight_decay_alpha)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(dropout_rate))
model.add(tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(weight_decay_alpha)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(dropout_rate))
model.add(Dense(6, activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

for train_indezes, test_indezes in kfold.split(X, y):
    K.clear_session()
    X_train, y_train = tf.convert_to_tensor(X[train_indezes]), tf.convert_to_tensor(y[train_indezes])
    X_test, y_test = tf.convert_to_tensor(X[test_indezes]), tf.convert_to_tensor(y[test_indezes])
    history = model.fit(X_train, y_train, batch_size=batch_size, callbacks=[early_stopping], epochs=epochs, validation_split=0.2, verbose=1)
    _, accuracy = model.evaluate(X_test, y_test)
    resnet50_test_accuracies.append(accuracy)

#### Confusion Matrix

Man kann klar erkennen, je weniger Datenpunkte vorhanden desto schlechter ist die Klassifikation. Leider konnten wir, wie bereits erwähnt ohne Domänenwissen und den eingeschränkten Mitteln das Problem nicht lösen.

In [None]:
confusion_matrix = confusion_matrix(true_labels, pred_labels)
fig, ax = plt.subplots(figsize=(8, 6))
cm_display = ConfusionMatrixDisplay(confusion_matrix, display_labels=classes)
cm_display.plot(ax=ax, cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

#### Trainings- und Validationkurve plotten

War für uns sehr hilfreich um overfitting zu erkennen

In [None]:
# Plotting the training and validation curves
epochs = len(train_losses)
plt.figure(figsize=(12, 4))

# Plotting loss curves
plt.subplot(1, 2, 1)
plt.plot(range(1, epochs + 1), train_losses, label='Training Loss')
plt.plot(range(1, epochs + 1), val_losses, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Plotting accuracy curves
plt.subplot(1, 2, 2)
plt.plot(range(1, epochs + 1), train_accuracies, label='Training Accuracy')
plt.plot(range(1, epochs + 1), val_accuracies, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.savefig("./cnn_files/loss_and_accuraccy.png")
plt.show()

#### Ergebnisse

|Durchführung|Avg. Test Acc|
|:-|-:|
|**VGG19**|
| Kein Resample, top 6 classes (> 60), vgg19| 82% |
| Kein Resample, top 4 classes (>200), vgg19| 83% |
| Kein Resample, top 2 classes (>500), vgg19| 91% |
|---------------|
|**ResNet50**|
| Kein Resample, top 6 classes (> 60), resNet50| 70% |
| Kein Resample, top 4 classes (>200), resNet50| 74% |
| Kein Resample, top 2 classes (>500), resNet50| 84% |
|---------------|
|**Unser Modell**|
| Kein Resample, top 6 classes (> 60), cnn  | 79% |
| Kein Resample, top 4 classes (>200), cnn  | 83% |
| Kein Resample, top 2 classes (>500), cnn  | 87% |

In [None]:
print(f"VGG19 Avg. test accuracy: {sum(vgg19_test_accuracies) / len(vgg19_test_accuracies)}")
print(f"ResNet50 Avg. test accuracy: {sum(vgg19_test_accuracies) / len(vgg19_test_accuracies)}")

print("Unser Modell")
print(f"Avg. Val Accuracy: {sum(val_accuracies) / len(val_accuracies)}")
print(f"Best Val Accuracy: {max(val_accuracies)}")
print(f"Avg. Test Accuracy: {sum(test_accuracies) / len(test_accuracies)}")
print(f"Best Test Accuracy: {max(test_accuracies)}")
print(f"Avg. Test Loss: {sum(test_losses) / len(test_losses)}")