In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random
import pandas as pd
from pathlib import Path
import os

import tensorflow as tf
import keras_tuner as kt


from tensorflow import keras
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

from collections import Counter

from functions import *  # functions used in project

tf.config.run_functions_eagerly(True)

In [None]:
tasks = ['tissuemnist', 'retinamnist', 'pneumoniamnist', 'pathmnist', 'organsmnist', 'organcmnist', 'organamnist', 'octmnist', 'dermamnist', 'breastmnist', 'bloodmnist']
task = tasks[6]

print(task)

In [None]:
# Upload images and resize to 32x32 (28x28 originally)
npz_path = Path('/Users/mateusz.maj/Downloads/data_2/', task+'.npz')
(train_images, train_labels), (val_images, val_labels), (test_images, test_labels) = load_npz_data(npz_path, resize = True)

print('Size of training set:', train_images.shape[0])
print('Size of validation set:', val_images.shape[0])
print('Size of test set:', test_images.shape[0])

In [None]:
# Visualisation of examples 
fig, axes = plt.subplots(1, 3, figsize=(10, 4))

for i in range(3):
    n = random.randint(0,len(train_images))
    img_tensor = np.expand_dims(train_images[n], axis=0)
    axes[i].imshow(img_tensor[0])
    axes[i].axis('off')
    axes[i].set_title(f"Image {n}")

plt.tight_layout()
plt.show()

In [None]:
# Share of lables in training set
for k,v in sorted(Counter(train_labels.ravel()).items()):
    print(k, np.round(v/len(train_labels),2))

In [None]:
# Transformation of data to Tensorflow processes

NUM_CLASSES = max(Counter(train_labels.ravel())) + 1
BATCH_SIZE = 64

if NUM_CLASSES > 2:
    print('Categorical classification - ', NUM_CLASSES)   
else:
    print('Binary classification')

train_dataset, val_dataset, test_dataset, train_dataset_sub = create_tf_datasets(train_images, train_labels, val_images, val_labels, test_images, test_labels, NUM_CLASSES, BATCH_SIZE)

In [None]:
# Model tuning
tuner = kt.BayesianOptimization(
    lambda hp: build_model(hp, NUM_CLASSES),
    objective=kt.Objective('val_weighted_f1_score', direction="max"), 
    max_trials=10,
    executions_per_trial=1,
    directory=Path('tuning', task + '/bayesian_tuning_results'),
    project_name='vgg16_medical_bayesian',
    overwrite=True
)

tuner.search(
    train_dataset_sub,
    validation_data=val_dataset,
    epochs=3,
)

In [None]:
tuner.results_summary()

In [None]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"Best Hyperparameters: \n"
      f"Dense Units: {best_hps.get('units')}\n"
      f"Dropout Rate: {best_hps.get('dropout')}\n"
      f"Optimizer: {best_hps.get('optimizer')}\n"
      f"Learning Rate: {best_hps.get('learning_rate')}")

In [None]:
checkpoint_callback = ModelCheckpoint(
    filepath= Path('medicine', task,'best_model.weights.h5'),  # Path to save the best model
    monitor='val_weighted_f1_score',    # Metric to monitor (validation accuracy)
    mode = 'max',
    save_best_only=True,       # Only save when val_accuracy improves
    save_weights_only=True,    # Save only the weights, not the entire model (for lighter files)
    verbose=1                  # To display when the model is saved
)

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

best_model = tuner.hypermodel.build(best_hps)

# Train the model with the checkpoint callback
history = best_model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=20,
    callbacks=[checkpoint_callback, early_stopping],  # Includes the checkpoint callback
)

In [None]:
acc = history.history['weighted_f1_score']
val_acc = history.history['val_weighted_f1_score']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:
eval = best_model.evaluate(test_dataset, verbose=1)
print(eval)

In [None]:
pred = best_model.predict(test_dataset, verbose = 1)
if NUM_CLASSES > 2:
    predicted_class_indices=np.argmax(pred,axis=1)
else:
    predicted_class_indices = np.where(pred>0.5,1,0).ravel()


In [None]:
sns.histplot(pred, bins=100, kde=True)
plt.show()

In [None]:
print("Accuracy: {0:0.1f}%".format(accuracy_score(test_labels.ravel(), predicted_class_indices)*100))
print('\n---Confusion matrix---')
print(confusion_matrix(test_labels.ravel(),predicted_class_indices))
print('\n---Classification report---')
print(classification_report(test_labels.ravel(), predicted_class_indices))

In [18]:
results = pd.DataFrame({
    "task_name": task,
    "label": predicted_class_indices
})
results.index.name = "id_image_in_task"

In [19]:
directory = f"classification/{task}"
os.makedirs(directory, exist_ok=True)
file_path = f"{directory}/results.csv"

results.to_csv(file_path)