In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import cv2
from tqdm import tqdm

# Parameters
IMG_SIZE = (224, 224)
BATCH_SIZE = 16  # Adjust if needed


In [2]:
model = tf.keras.models.load_model('burn_classification_model_final2.h5', compile=False)


In [3]:
def advanced_preprocess_image(image_path, target_size=IMG_SIZE):
    try:
        img = cv2.imread(image_path)
        if img is None:
            raise ValueError(f"Could not load image: {image_path}")
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        h, w = img.shape[:2]
        aspect_ratio = w / h
        if aspect_ratio > 1:
            new_w = target_size[0]
            new_h = int(target_size[0] / aspect_ratio)
        else:
            new_h = target_size[1]
            new_w = int(target_size[1] * aspect_ratio)
        img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
        delta_w = target_size[0] - new_w
        delta_h = target_size[1] - new_h
        top, bottom = delta_h // 2, delta_h - (delta_h // 2)
        left, right = delta_w // 2, delta_w - (delta_w // 2)
        img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=0)
        # CLAHE (optional if you used in training)
        lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        lab[:, :, 0] = clahe.apply(lab[:, :, 0])
        img = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
        img = img.astype(np.float32) / 255.0
        return img
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return np.zeros((*target_size, 3), dtype=np.float32)


In [4]:
df_unknown = pd.read_csv("burns_unknown_degree.csv")
print(f"Number of unknown burn images: {len(df_unknown)}")


Number of unknown burn images: 1227


In [5]:
def predict_images(df, model, threshold=0.3):
    all_preds = []
    all_probs = []
    for start in tqdm(range(0, len(df), BATCH_SIZE)):
        end = min(start + BATCH_SIZE, len(df))
        batch_files = df['filepath'].iloc[start:end]
        batch_imgs = np.stack([advanced_preprocess_image(f) for f in batch_files])
        batch_probs = model.predict(batch_imgs, verbose=0)
        all_probs.extend(batch_probs.flatten())
        batch_preds = (batch_probs > threshold).astype(int).flatten()
        all_preds.extend(batch_preds)
    return np.array(all_preds), np.array(all_probs)


In [6]:
pred_labels, pred_probs = predict_images(df_unknown, model, threshold=0.3)


100%|██████████| 77/77 [00:05<00:00, 12.91it/s]


In [7]:
df_unknown['predicted_label'] = pred_labels
df_unknown['predicted_prob'] = pred_probs
df_unknown['predicted_label_str'] = df_unknown['predicted_label'].map({0: "1st/2nd degree", 1: "3rd degree"})

# Save to CSV for review or merging
df_unknown.to_csv("burns_unknown_labeled.csv", index=False)
print("Saved: burns_unknown_labeled.csv")
print(df_unknown['predicted_label_str'].value_counts())


Saved: burns_unknown_labeled.csv
predicted_label_str
3rd degree        730
1st/2nd degree    497
Name: count, dtype: int64


In [8]:
print("\nLow confidence cases (<0.7):")
print(df_unknown[df_unknown['predicted_prob'] < 0.7][['filepath', 'predicted_prob', 'predicted_label_str']].head(10))



Low confidence cases (<0.7):
                                             filepath  predicted_prob  \
0   C:\Users\dabne\.cache\kagglehub\datasets\shubh...        0.030050   
1   C:\Users\dabne\.cache\kagglehub\datasets\shubh...        0.184415   
2   C:\Users\dabne\.cache\kagglehub\datasets\shubh...        0.415353   
3   C:\Users\dabne\.cache\kagglehub\datasets\shubh...        0.018261   
4   C:\Users\dabne\.cache\kagglehub\datasets\shubh...        0.151950   
5   C:\Users\dabne\.cache\kagglehub\datasets\shubh...        0.517774   
6   C:\Users\dabne\.cache\kagglehub\datasets\shubh...        0.266277   
7   C:\Users\dabne\.cache\kagglehub\datasets\shubh...        0.596577   
9   C:\Users\dabne\.cache\kagglehub\datasets\shubh...        0.674850   
11  C:\Users\dabne\.cache\kagglehub\datasets\shubh...        0.055434   

   predicted_label_str  
0       1st/2nd degree  
1       1st/2nd degree  
2           3rd degree  
3       1st/2nd degree  
4       1st/2nd degree  
5           3rd 

In [16]:


# Load new predictions
df_unknown = pd.read_csv('burns_unknown_labeled.csv')

# Load original labeled datasets
df_1and2 = pd.read_csv('burns_1and2.csv')   # original 1st/2nd degree, labeled 0
df_3 = pd.read_csv('burns_3rd.csv')         # original 3rd degree, labeled 1


In [17]:
# Only keep high-confidence (>= 0.7)
high_conf = df_unknown[df_unknown['predicted_prob'] >= 0.7].copy()

print(f"Keeping {len(high_conf)} of {len(df_unknown)} predictions (confidence >= 0.7)")
print(high_conf['predicted_label_str'].value_counts())


Keeping 266 of 1227 predictions (confidence >= 0.7)
predicted_label_str
3rd degree    266
Name: count, dtype: int64


In [18]:
# 1. Ensure consistent columns
df_1and2 = df_1and2.rename(columns={'label': 'binary_label'})
df_3 = df_3.rename(columns={'label': 'binary_label'})

# 2. Standardize all labels to numeric 0/1
df_1and2['binary_label'] = 0  # All 1st/2nd degree
df_3['binary_label'] = 1      # All 3rd degree
high_conf['binary_label'] = high_conf['predicted_label']  # already numeric

# 3. Keep only necessary columns
df_1and2 = df_1and2[['filepath', 'binary_label']]
df_3 = df_3[['filepath', 'binary_label']]
high_conf = high_conf[['filepath', 'binary_label']]

# 4. Combine and save
df_all = pd.concat([df_1and2, df_3, high_conf], ignore_index=True)
df_all.to_csv('burns_dataset_expanded.csv', index=False)
print(df_all['binary_label'].value_counts())
print("Saved: burns_dataset_expanded.csv")






binary_label
0    4876
1    1289
Name: count, dtype: int64
Saved: burns_dataset_expanded.csv


In [19]:
# Combine all (original and new labeled)
df_all = pd.concat([df_1and2, df_3, high_conf], ignore_index=True)

print(f"Final combined dataset size: {len(df_all)}")
print(df_all['binary_label'].value_counts())

# Save
df_all.to_csv('burns_dataset_expanded.csv', index=False)
print("Saved: burns_dataset_expanded.csv")


Final combined dataset size: 6165
binary_label
0    4876
1    1289
Name: count, dtype: int64
Saved: burns_dataset_expanded.csv
