In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import cv2
from tqdm import tqdm

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve

from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.utils import Sequence
from tensorflow.keras import layers, models, callbacks, Model
from tensorflow.keras.applications import EfficientNetV2B0, DenseNet121
import cv2


# Detect and list GPUs
gpus = tf.config.list_physical_devices('GPU')
print(f"GPUs detected: {gpus}")

# Optionally, set memory growth to avoid OOM errors
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

#  use all GPUs for inference (not just the first one)
if len(gpus) > 1:
    strategy = tf.distribute.MirroredStrategy()
    print(f"Using MirroredStrategy with {strategy.num_replicas_in_sync} GPUs")
else:
    strategy = tf.distribute.get_strategy()

# Parameters
IMG_SIZE = (224, 224)
BATCH_SIZE = 16  # Adjust if needed

2025-07-25 15:04:20.796083: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-25 15:04:20.803720: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753473860.812176   42317 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753473860.814665   42317 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1753473860.821259   42317 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

GPUs detected: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
Using MirroredStrategy with 2 GPUs


I0000 00:00:1753473862.270916   42317 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 21024 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090 Ti, pci bus id: 0000:02:00.0, compute capability: 8.6
I0000 00:00:1753473862.272300   42317 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 21255 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:01:00.0, compute capability: 8.6


In [2]:
model = tf.keras.models.load_model('burn_triage_cnn.h5', compile=False)


In [3]:
def advanced_preprocess_image(image_path, target_size=IMG_SIZE):
    try:
        img = cv2.imread(image_path)
        if img is None:
            raise ValueError(f"Could not load image: {image_path}")
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        h, w = img.shape[:2]
        aspect_ratio = w / h
        if aspect_ratio > 1:
            new_w = target_size[0]
            new_h = int(target_size[0] / aspect_ratio)
        else:
            new_h = target_size[1]
            new_w = int(target_size[1] * aspect_ratio)
        img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
        delta_w = target_size[0] - new_w
        delta_h = target_size[1] - new_h
        top, bottom = delta_h // 2, delta_h - (delta_h // 2)
        left, right = delta_w // 2, delta_w - (delta_w // 2)
        img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=0)
        # CLAHE (optional if you used in training)
        lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        lab[:, :, 0] = clahe.apply(lab[:, :, 0])
        img = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
        img = img.astype(np.float32) / 255.0
        return img
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return np.zeros((*target_size, 3), dtype=np.float32)


In [4]:
df_unknown = pd.read_csv("burns_unknown_degree.csv")
print(f"Number of unknown burn images: {len(df_unknown)}")


Number of unknown burn images: 1227


In [5]:
def predict_images(df, model, threshold=0.3):
    all_preds = []
    all_probs = []
    for start in tqdm(range(0, len(df), BATCH_SIZE)):
        end = min(start + BATCH_SIZE, len(df))
        batch_files = df['filepath'].iloc[start:end]
        batch_imgs = np.stack([advanced_preprocess_image(f) for f in batch_files])
        batch_probs = model.predict(batch_imgs, verbose=0)
        all_probs.extend(batch_probs.flatten())
        batch_preds = (batch_probs > threshold).astype(int).flatten()
        all_preds.extend(batch_preds)
    return np.array(all_preds), np.array(all_probs)


In [6]:
pred_labels, pred_probs = predict_images(df_unknown, model, threshold=0.3)


I0000 00:00:1753473862.897456   42445 service.cc:152] XLA service 0x7a23640067f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1753473862.897475   42445 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3090 Ti, Compute Capability 8.6
I0000 00:00:1753473862.897477   42445 service.cc:160]   StreamExecutor device (1): NVIDIA GeForce RTX 3090, Compute Capability 8.6
2025-07-25 15:04:22.901503: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1753473862.920437   42445 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1753473863.273512   42445 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
100%|████████████████████████████████████████████████████████| 77/77 [00:06<00:00, 11.77it/s]


In [7]:
df_unknown['predicted_label'] = pred_labels
df_unknown['predicted_prob'] = pred_probs
df_unknown['predicted_label_str'] = df_unknown['predicted_label'].map({0: "1st/2nd degree", 1: "3rd degree"})

# Save to CSV for review or merging
df_unknown.to_csv("burns_unknown_labeled.csv", index=False)
print("Saved: burns_unknown_labeled.csv")
print(df_unknown['predicted_label_str'].value_counts())


Saved: burns_unknown_labeled.csv
predicted_label_str
3rd degree        1160
1st/2nd degree      67
Name: count, dtype: int64


In [8]:
print("\nLow confidence cases (<0.7):")
print(df_unknown[df_unknown['predicted_prob'] < 0.7][['filepath', 'predicted_prob', 'predicted_label_str']].head(10))



Low confidence cases (<0.7):
                    filepath  predicted_prob predicted_label_str
2   burn_dataset/img1400.jpg        0.498493          3rd degree
3    burn_dataset/img587.jpg        0.676570          3rd degree
4    burn_dataset/img614.jpg        0.581842          3rd degree
11   burn_dataset/img508.jpg        0.687151          3rd degree
14   burn_dataset/img512.jpg        0.230303      1st/2nd degree
19   burn_dataset/img619.jpg        0.566925          3rd degree
21  burn_dataset/img1183.jpg        0.554728          3rd degree
24   burn_dataset/img671.jpg        0.321389          3rd degree
26   burn_dataset/img693.jpg        0.477440          3rd degree
30  burn_dataset/img1228.jpg        0.537761          3rd degree


In [9]:


# Load new predictions
df_unknown = pd.read_csv('burns_unknown_labeled.csv')

# Load original labeled datasets
df_1and2 = pd.read_csv('burns_1and2.csv')   # original 1st/2nd degree, labeled 0
df_3 = pd.read_csv('burns_3rd.csv')         # original 3rd degree, labeled 1


In [10]:
# Only keep high-confidence (>= 0.7)
high_conf = df_unknown[df_unknown['predicted_prob'] >= 0.7].copy()

print(f"Keeping {len(high_conf)} of {len(df_unknown)} predictions (confidence >= 0.7)")
print(high_conf['predicted_label_str'].value_counts())


Keeping 752 of 1227 predictions (confidence >= 0.7)
predicted_label_str
3rd degree    752
Name: count, dtype: int64


In [11]:
# Ensure consistent columns
df_1and2 = df_1and2.rename(columns={'label': 'binary_label'})
df_3 = df_3.rename(columns={'label': 'binary_label'})

# Standardize all labels to numeric 0/1
df_1and2['binary_label'] = 0  # All 1st/2nd degree
df_3['binary_label'] = 1      # All 3rd degree
high_conf['binary_label'] = high_conf['predicted_label']  # already numeric

# Keep only necessary columns
df_1and2 = df_1and2[['filepath', 'binary_label']]
df_3 = df_3[['filepath', 'binary_label']]
high_conf = high_conf[['filepath', 'binary_label']]







In [12]:
# Combine all (original and new labeled)
df_all = pd.concat([df_1and2, df_3, high_conf], ignore_index=True)

print(f"Final combined dataset size: {len(df_all)}")
print(df_all['binary_label'].value_counts())

# Save
df_all.to_csv('burns_dataset_expanded.csv', index=False)
print("Saved: burns_dataset_expanded.csv")


Final combined dataset size: 6651
binary_label
0    4876
1    1775
Name: count, dtype: int64
Saved: burns_dataset_expanded.csv
