In [1]:
import tensorflow as tf
from tensorflow.keras import layers # type: ignore
from tensorflow.keras.preprocessing import image_dataset_from_directory # type: ignore
import pathlib
from sklearn.metrics import recall_score, accuracy_score, f1_score, precision_score, cohen_kappa_score, roc_auc_score


2025-03-24 16:28:04.083875: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-24 16:28:04.090846: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-24 16:28:04.125249: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-24 16:28:04.214995: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1742833684.282518   83434 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1742833684.29

In [2]:
batch_size = 32
img_height = 299
img_width  = 299

#Adjust the path according to your machine
data_dir_test  = pathlib.Path('raw_data/data_test/')


test_ds = image_dataset_from_directory(
            data_dir_test,
            image_size=(img_height, img_width),
            batch_size=batch_size)

for image_batch, labels_batch in test_ds:
  print(f"👉The shape of each test batch is {image_batch.shape}")
  print(f"  The shape of each target batch is {labels_batch.shape}")
  break

print("👀 The classes that was used for training the model are:")
for name in test_ds.class_names:
    print(f"- {name}")

num_classes = len(test_ds.class_names)


Found 451 files belonging to 7 classes.


E0000 00:00:1742833693.651781   83434 cuda_executor.cc:1228] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
W0000 00:00:1742833693.843906   83434 gpu_device.cc:2341] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


👉The shape of each test batch is (32, 299, 299, 3)
  The shape of each target batch is (32,)
👀 The classes that was used for training the model are:
- cataract
- degeneration
- diabets
- glaucoma
- hypertension
- myopia
- normal


In [3]:
from tensorflow.keras import layers

#Normalization of the image tensors
normalization_layer =   layers.Rescaling(1./255)
normalized_test_ds  =  test_ds.map(lambda x, y: (normalization_layer(x), y))

# Let's test our model

In [4]:
#### # Load the model
model = model = tf.keras.models.load_model("raw_data/Xception-01.keras")


  saveable.load_own_variables(weights_store.get(inner_path))


In [5]:
y_pred_prob = model.predict(normalized_test_ds)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 2s/step


In [6]:
import numpy as np

y_true = np.concatenate([y.numpy() for _, y in normalized_test_ds], axis=0)
y_pred = np.argmax(y_pred_prob, axis=1)

2025-03-24 16:28:48.370948: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [7]:
test_ds.class_names

['cataract',
 'degeneration',
 'diabets',
 'glaucoma',
 'hypertension',
 'myopia',
 'normal']

# AUC for multiclass

In [8]:
from sklearn.preprocessing import OneHotEncoder
import numpy as np

encoder = OneHotEncoder(sparse_output=False)
y_true_one_hot = encoder.fit_transform(np.array(y_true).reshape(-1, 1))
y_true_one_hot

array([[0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       ...,
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [9]:
n_classes = y_true_one_hot.shape[1]  # Number of classes
auc_scores = []

for i in range(n_classes):
    auc = roc_auc_score(y_true_one_hot[:, i], np.array(y_pred_prob)[:, i], multi_class='ovo')
    auc_scores.append(auc)

print(f"AUC scores for each class: {auc_scores}")

AUC scores for each class: [np.float64(0.31623323852092644), np.float64(0.5427464008859357), np.float64(0.555219620743034), np.float64(0.5822208094935368), np.float64(0.5848072562358277), np.float64(0.5207852193995381), np.float64(0.5112710964239349)]


# See other metrics

In [18]:

results = [['class','accuracy', 'recall', 'f1_score', 'precision', "roc_auc"] ]
for class_id in range(7):
    class_y_true = [1 if label == class_id else 0 for label in y_true]
    class_y_pred = [1 if pred  == class_id else 0 for pred in y_pred ]
    
    class_recall    = recall_score(class_y_true, class_y_pred, average = 'macro')

    class_accuracy  = accuracy_score(class_y_true, class_y_pred)
    
    class_f1        = f1_score(class_y_true, class_y_pred, average = 'macro')

    class_precision = precision_score(class_y_true, class_y_pred, average = 'macro')

    #class_auc = auc_scores[class_id]
    class_auc = roc_auc_score(class_y_true, class_y_pred, average = 'macro')

    results.append([test_ds.class_names[class_id], class_accuracy, class_recall, class_f1, class_precision, class_auc])

print("{:<20} {:<10} {:<10}  {:<10} {:<10} {:<10}".format(*results[0]))

for row in results[1:]:
    print("{:<20} {:<10.2} {:<10.2}  {:<10.2} {:<10.2} {:<10.2}".format(*row))


class                accuracy   recall      f1_score   precision  roc_auc   
cataract             0.89       0.47        0.47       0.47       0.47      
degeneration         0.92       0.53        0.53       0.53       0.53      
diabets              0.69       0.53        0.51       0.56       0.53      
glaucoma             0.87       0.52        0.51       0.51       0.52      
hypertension         0.96       0.49        0.49       0.49       0.49      
myopia               0.92       0.51        0.51       0.51       0.51      
normal               0.52       0.52        0.51       0.52       0.52      


Given that our model has a high accuracy for amolst all classes, it is able to classify some instances. However, it's important to consinder that the dataset is imbalanced. When we look at the results for other metrics, we can see that the model is very close to the baseline. 

# We don't need to upload the test dataset to the clouds. We can create a dict by hand.

In [11]:
classes_dict = {0: 'cataract', 1 :'degeneration', 2:'diabets', 3: 'glaucoma', 4: 'hypertension', 5:'myopia', 6:'normal'}

In [12]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Load an image and resize it to the model's expected input size (e.g., 299x299 for Xception)
image = load_img("raw_data/data_test/glaucoma/1365_right.jpg", target_size=(299, 299))  # glaucoma image
image_array_norm = img_to_array(image) / 255.0  # Normalize to [0, 1]
image_array = np.expand_dims(image_array_norm, axis=0)  # Add batch dimension

In [13]:
pred = model.predict(image_array)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step


In [14]:
pred

array([[2.2466382e-02, 1.3366773e-03, 4.8659969e-02, 6.9714057e-01,
        4.3341835e-04, 1.9818561e-05, 2.2994313e-01]], dtype=float32)

In [15]:
pred_id = np.argmax(pred, axis=1)[0]

In [16]:
classes_dict[pred_id]

'glaucoma'