In [None]:
##########
# from numba import cuda
# import torch
##########

import warnings
warnings.filterwarnings("ignore")

###########

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras import Input
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input, Dropout,BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.regularizers import l1, l2
from tensorflow.keras.applications.vgg16 import preprocess_input
from sklearn.model_selection import StratifiedKFold, train_test_split

import scipy.stats as stats

import seaborn as sns
from sklearn import model_selection
from sklearn.utils import resample
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, confusion_matrix, roc_curve, roc_auc_score, classification_report
from sklearn.ensemble import RandomForestClassifier

from tqdm.auto import tqdm

import iterstrat
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

from PIL import Image  # Add this line

import scipy.stats as stats
# from tensorflow.keras.preprocessing.image import load_img, img_to_array


In [None]:
# Definitions

def load_images_and_labels(folder_path, label_file, target_size):
    images = []
    labels = []
    with open(label_file, 'r') as f:
        lines = f.readlines()
        for line in lines:
            image_name, label = line.strip().split(',')
            img = image.load_img(os.path.join(folder_path, image_name), target_size=target_size, color_mode='grayscale')
            img_array = image.img_to_array(img)
            img_array /= 255.0  # Normalize pixel values
            images.append(img_array)
            labels.append(int(label))
    return np.array(images), np.array(labels)

###########
# Plot and save loss graph
def plot_loss(history,save_path=None):
    file_name = "_lossPlot.png"
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Test Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    if save_path:
        plt.savefig(save_path+file_name)
    plt.show()

# Plot and save accuracy graph
def plot_acc(history, save_path=None):
    file_name = "_accPlot.png"
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Test Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    if save_path:
        plt.savefig(save_path+file_name)
    plt.show()
        
# Plot confusion matrix as heatmap
def plot_confMatrix(conf_matrix,save_path=None):
    file_name = "_cnfMat.png"
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False, square=True, 
                xticklabels=['Class A', 'Class B', 'Class C'], 
                yticklabels=['Class A', 'Class B', 'Class C'])
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title('Confusion Matrix')
    if save_path:
        plt.savefig(save_path+file_name)
    plt.show()

def calculate_metrics(model_name, history, y_true, y_pred,y_test,x_test):
    confidence_level = 0.95  # You can adjust this as needed
    z_score = 1.96  # Z-score for 95% confidence interval
    n_samples = len(y_true)
    loss_test = history.history['val_loss'][-1]
    acc_test = history.history['val_accuracy'][-1]
    loss_train = history.history['loss'][-1]
    acc_train = history.history['accuracy'][-1]
    f1 = f1_score(y_true, y_pred, average='weighted')
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    accuracy_ci = (accuracy_score(y_true, y_pred) - z_score * np.sqrt(accuracy_score(y_true, y_pred) * (1 - accuracy_score(y_true, y_pred)) / n_samples),
               accuracy_score(y_true, y_pred) + z_score * np.sqrt(accuracy_score(y_true, y_pred) * (1 - accuracy_score(y_true, y_pred)) / n_samples))
    descr = None
    return {
        'Model': model_name,
        'Loss_test': loss_test,
        'Acc_test': acc_test,
        'Loss_train': loss_train,
        'Acc_train': acc_train,
        'F1 Score': f1,
        'Precision': precision,
        'Recall': recall,
        'accuracy_ci':accuracy_ci,
        'descr': descr,
    }


# Save metrics table as image
def metrics_table_pic(metrics_df, save_path=None):
    file_name = "_metrcs.png"
    plt.figure(figsize=(8, 6))
    plt.table(cellText=metrics_df.values, colLabels=metrics_df.columns, loc='center', cellLoc='center')
    plt.axis('off')
    plt.tight_layout()
    if save_path:
        plt.savefig(save_path+file_name)
    plt.show()

# write metrics into a csv file
def write_metrics_to_csv(metrics_dict):
    csv_file = 'C:/Users/SIMIC/Downloads/Bilder_split/output/existing_metrics_table.csv'

    # Convert the dictionary to a DataFrame
    metrics_df = pd.DataFrame.from_dict(metrics_dict, orient='index').T

    # Check if the CSV file exists
    if os.path.exists(csv_file):
        try:
            # Try reading the existing data
            existing_data = pd.read_csv(csv_file)
            if existing_data.empty:
                # If the existing data is empty, write the metrics_df as the first entries
                metrics_df.to_csv(csv_file, index=False)
            else:
                # If the existing data is not empty, append metrics_df to it
                existing_data = pd.concat([existing_data, metrics_df], ignore_index=True)
                existing_data.to_csv(csv_file, index=False)
        except pd.errors.EmptyDataError:
            # Handle the case where the file exists but is empty
            metrics_df.to_csv(csv_file, index=False)
    else:
        # If the file doesn't exist, write the metrics_df as the first entries
        metrics_df.to_csv(csv_file, index=False)
###########

# image and label pats
folder_path = r'C:/Users/SIMIC/Downloads/Bilder_split/ap2/'
label_file = os.path.join(folder_path, 'labelsPrio1.txt')


In [None]:
# main model

#set seeds
np.random.seed(42)
tf.random.set_seed(42)

# define variables and hyperparameters
descr = "32/32/64/64/64-dense/EarlyStop"
loss= "categorical_crossentropy"
optimizer= "adam"
epochs= 50
image_width = 224
image_height = 224
target_size = (image_height, image_width)
batch_size = 16
l2_reg=0.01

# prepare data set
images, labels = load_images_and_labels(folder_path, label_file, target_size)
labels = to_categorical(labels - 1) # onehot encoding

split_ratio = 0.8
split_idx = int(len(images) * split_ratio)
x_train, y_train = images[:split_idx], labels[:split_idx]
x_test, y_test = images[split_idx:], labels[split_idx:]

model = Sequential([
    Input(shape=(image_height, image_width, 1)),
    Conv2D(32, (3, 3), activation='relu'),
    Conv2D(32, (3, 3), activation='relu'),
    Conv2D(64, (3, 3), activation='relu'),
    Conv2D(64, (3, 3), activation='relu'),
    Flatten(),
    Dense(64, activation='relu', kernel_regularizer=l2(l2_reg)),
    Dropout(0.75),
    Dense(3, activation='softmax')
])

#compile
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)

# train 
history = model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, y_test), callbacks=[early_stopping])

# pred
y_pred = np.argmax(model.predict(x_test), axis=1)
y_true = np.argmax(y_test, axis=1)

# Calculate classification metrics
loss_test = history.history['val_loss'][-1]
acc_test = history.history['val_accuracy'][-1]
loss_train = history.history['loss'][-1]
acc_train = history.history['accuracy'][-1]
accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')


# Accuarcy CI
confidence_level = 0.95
z_score = 1.96  # Z-score for 95% confidence interval
n_samples = len(y_true)

accuracy_ci = (accuracy - z_score * np.sqrt(accuracy * (1 - accuracy) / n_samples),
               accuracy + z_score * np.sqrt(accuracy * (1 - accuracy) / n_samples))

# Print metrics
metrics_dict = {
    'Metric': ['Loss_test', 'Acc_test', 'Loss_train', 'Acc_train', 'F1 Score', 'Precision', 'Recall',
               'acc_ci', 'descr'],
    'Value': [loss_test, acc_test, loss_train, acc_train, f1, precision, recall,
              accuracy_ci, descr]
}

conf_matrix = confusion_matrix(y_true, y_pred)
metrics_df = pd.DataFrame(metrics_dict)
plot_loss(history)
plot_acc(history)
plot_confMatrix(conf_matrix)
print(metrics_df)



In [None]:
# Main Model with Data augmentation

# set seeds
np.random.seed(42)
tf.random.set_seed(42)

# define variables and hyperparameters
descr = "32/32/64/64/64-dense/EarlyStop/dataAug"
loss= "categorical_crossentropy"
optimizer= "adam"
epochs= 50
image_width = 224
image_height = 224
target_size = (image_height, image_width)
batch_size = 32
l2_reg=0.01

# prepare data set
images, labels = load_images_and_labels(folder_path, label_file, target_size)
labels = to_categorical(labels - 1) # onehot encoding

split_ratio = 0.8
split_idx = int(len(images) * split_ratio)
x_train, y_train = images[:split_idx], labels[:split_idx]
x_test, y_test = images[split_idx:], labels[split_idx:]

model2 = Sequential([
    Input(shape=(image_height, image_width, 1)),
    Conv2D(32, (3, 3), activation='relu'),
    Conv2D(32, (3, 3), activation='relu'),
    Conv2D(64, (3, 3), activation='relu'),
    Conv2D(64, (3, 3), activation='relu'),
    Flatten(),
    Dense(64, activation='relu', kernel_regularizer=l2(l2_reg)),
    Dropout(0.75),
    Dense(3, activation='softmax')
])

model2.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

# Image Augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    zoom_range=0.15,
    vertical_flip=True,
    horizontal_flip=False,
    fill_mode='constant'
)

# Set up EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model with augmented data
history = model2.fit(datagen.flow(x_train, y_train, batch_size=batch_size),
                    epochs=epochs,
                    steps_per_epoch=int(len(x_train) / batch_size),
                    validation_data=(x_test, y_test))


y_pred = np.argmax(model2.predict(x_test), axis=1)
y_true = np.argmax(y_test, axis=1)

conf_matrix = confusion_matrix(y_true, y_pred)

# Calculate classification metrics
loss_test = history.history['val_loss'][-1]
acc_test = history.history['val_accuracy'][-1]
loss_train = history.history['loss'][-1]
acc_train = history.history['accuracy'][-1]
accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')


# Accuarcy CI
confidence_level = 0.95
z_score = 1.96  # Z-score for 95% confidence interval
n_samples = len(y_true)

accuracy_ci = (accuracy - z_score * np.sqrt(accuracy * (1 - accuracy) / n_samples),
               accuracy + z_score * np.sqrt(accuracy * (1 - accuracy) / n_samples))

# Print metrics
metrics_dict = {
    'Metric': ['Loss_test', 'Acc_test', 'Loss_train', 'Acc_train', 'F1 Score', 'Precision', 'Recall',
               'acc_ci', 'descr'],
    'Value': [loss_test, acc_test, loss_train, acc_train, f1, precision, recall,
              accuracy_ci, descr]
}

conf_matrix = confusion_matrix(y_true, y_pred)
metrics_df = pd.DataFrame(metrics_dict)
plot_loss(history)
plot_acc(history)
plot_confMatrix(conf_matrix)
print(metrics_df)

In [None]:
### VGG16 and ResNet 50

# set seeds
np.random.seed(42)
tf.random.set_seed(42)

from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing import image

# Define paths to your train and validation data directories
train_data_dir = 'C:/Users/SIMIC/Downloads/Bilder_split/vgg16/training'
val_data_dir = 'C:/Users/SIMIC/Downloads/Bilder_split/vgg16/validation'

# Define batch size and epochs
batch_size = 32
epochs = 50

# Custom data generator with preprocessing function
class CustomImageDataGenerator(ImageDataGenerator):
    def __init__(self, grayscale_to_rgb=False, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.grayscale_to_rgb = grayscale_to_rgb

    def preprocess_img(self, img):
        if self.grayscale_to_rgb:
            img = np.asarray(image.img_to_array(img.convert("RGB")))
        return img

    def __getitem__(self, idx):
        batch_x, batch_y = super().__getitem__(idx)
        if self.grayscale_to_rgb:
            batch_x = np.array([self.preprocess_img(img) for img in batch_x])
        return batch_x, batch_y

# Create train and validation generators with custom preprocessing function
train_generator = CustomImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    grayscale_to_rgb=True  # Apply grayscale to RGB conversion
).flow_from_directory(
    train_data_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical',
    color_mode='rgb'  # Set to RGB
)

val_generator = CustomImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    grayscale_to_rgb=True  # Apply grayscale to RGB conversion
).flow_from_directory(
    val_data_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical',
    color_mode='rgb'  # Set to RGB
)

# Create and compile VGG16 model
base_model_vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model_vgg16 = Sequential([
    base_model_vgg16,
    GlobalAveragePooling2D(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(3, activation='softmax')  # Update the number of neurons to 3
])
model_vgg16.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history_vgg16 = model_vgg16.fit(train_generator, epochs=epochs, validation_data=val_generator)

# Create and compile ResNet50 model
base_model_resnet50 = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model_resnet50 = Sequential([
    base_model_resnet50,
    GlobalAveragePooling2D(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(3, activation='softmax')  # Update the number of neurons to 3
])
model_resnet50.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history_resnet50 = model_resnet50.fit(train_generator, epochs=epochs, validation_data=val_generator)


# Plot training accuracy
plt.plot(history_vgg16.history['accuracy'], label='VGG16 Training Accuracy')
plt.plot(history_resnet50.history['accuracy'], label='ResNet50 Training Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
# Random forest

# set seeds
np.random.seed(42)
tf.random.set_seed(42)

# Path to the folder containing images
image_folder = 'C:/Users/SIMIC/Downloads/Bilder_split/ap2/'

# Path to the text file containing labels
txt_file = 'C:/Users/SIMIC/Downloads/Bilder_split/ap2/labelsPrio1.txt'

# Lists to store image paths and labels
image_paths = []
labels = []

def extract_features(image_path):
    # Load and preprocess the image
    img = load_img(image_path, target_size=(224, 224))
    img = img_to_array(img)
    img = preprocess_input(img)
    img = np.expand_dims(img, axis=0)
    return img

# Read the text file line by line
with open(txt_file, "r") as file:
    for line in file:
        parts = line.strip().split(",")  # Split each line based on comma separator
        image_name = parts[0].strip()  # Extract image name
        label = int(parts[1].strip())  # Extract label
        image_path = os.path.join(image_folder, image_name)  # Construct full image path
        image_paths.append(image_path)
        labels.append(label)


image_paths = np.array(image_paths)
labels = np.array(labels)

X_train_paths, X_test_paths, y_train, y_test = train_test_split(image_paths, labels, test_size=0.2, random_state=42)
X_train_features = np.array([extract_features(image_path) for image_path in X_train_paths])
X_test_features = np.array([extract_features(image_path) for image_path in X_test_paths])

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_features.reshape(X_train_features.shape[0], -1), y_train)

y_pred = rf_classifier.predict(X_test_features.reshape(X_test_features.shape[0], -1))

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

n_iterations = 1000
n_size = len(y_test)
acc_results = np.zeros(n_iterations)
for i in range(n_iterations):
    # Bootstrap resample
    indices = np.random.choice(len(y_test), size=n_size)
    sample_X, sample_y = X_test_features[indices], y_test[indices]
    # Predict using the trained model
    yhat = rf_classifier.predict(sample_X.reshape(sample_X.shape[0], -1))
    acc = accuracy_score(sample_y, yhat)
    acc_results[i] = acc

# Confidence intervals
acc_ci = stats.t.interval(0.95, len(acc_results) - 1, loc=np.mean(acc_results), scale=stats.sem(acc_results))

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("Accuracy Confidence Interval:", acc_ci)