In [3]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from collections import Counter
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, confusion_matrix
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Set paths
dataset_path = r"C:\Users\abdhe\OneDrive\Documents\GitHub\BloodMark\dataset"
save_path = r"C:\Users\abdhe\OneDrive\Documents\GitHub\BloodMark\model\model.h5"
BATCH_SIZE = 32
IMG_SIZE = (64, 64)
EPOCHS = 50

# Step 1: Load and preprocess dataset
logger.info("Loading dataset...")
dataset = tf.keras.utils.image_dataset_from_directory(
    dataset_path,
    labels="inferred",
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=42
)

class_names = dataset.class_names
logger.info(f"Classes found: {class_names}")

# Step 2: Check class distribution
class_counts = Counter()
for _, labels in dataset.unbatch():
    class_counts[int(labels.numpy())] += 1

logger.info("Class Distribution:")
for i, count in class_counts.items():
    logger.info(f"{class_names[i]}: {count}")

def plot_class_distribution(class_names, class_counts, title="Class Distribution"):
    classes = [class_names[i] for i in class_counts.keys()]
    counts = [class_counts[i] for i in class_counts.keys()]
    plt.figure(figsize=(10, 6))
    plt.bar(classes, counts)
    plt.title(title)
    plt.xlabel("Class")
    plt.ylabel("Number of Images")
    plt.savefig("class_distribution.png")
    plt.close()

plot_class_distribution(class_names, class_counts, "Original Class Distribution")

# Step 3: Compute class weights for imbalanced classes (Fixed)
# Extract labels as NumPy array to avoid Tensor unhashable error
labels = np.array([label.numpy() for _, label in dataset.unbatch()])
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(list(class_counts.keys())),
    y=labels
)
class_weights = dict(enumerate(class_weights))
logger.info(f"Class weights: {class_weights}")

# Step 4: Data augmentation
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal_and_vertical"),
    tf.keras.layers.RandomRotation(0.2),
    tf.keras.layers.RandomZoom(0.2),
    tf.keras.layers.RandomBrightness(0.2),
    tf.keras.layers.RandomContrast(0.2),
])

# Step 5: Split dataset
train_ratio, val_ratio = 0.7, 0.2
dataset_size = sum(1 for _ in dataset.unbatch())
train_size = int(train_ratio * dataset_size)
val_size = int(val_ratio * dataset_size)
test_size = dataset_size - train_size - val_size

train_dataset = dataset.take(train_size // BATCH_SIZE)
val_dataset = dataset.skip(train_size // BATCH_SIZE).take(val_size // BATCH_SIZE)
test_dataset = dataset.skip((train_size + val_size) // BATCH_SIZE)

# Optimize data pipeline
train_dataset = train_dataset.map(lambda x, y: (data_augmentation(x, training=True), y), num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.cache().shuffle(1000).prefetch(tf.data.AUTOTUNE)
val_dataset = val_dataset.cache().prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.cache().prefetch(tf.data.AUTOTUNE)

logger.info(f"Dataset sizes - Train: {train_size}, Validation: {val_size}, Test: {test_size}")

# Step 6: Define improved model
def create_optimized_model(num_classes):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Rescaling(1./255, input_shape=(64, 64, 3)),
        tf.keras.layers.Conv2D(32, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling2D(2, 2),
        tf.keras.layers.Dropout(0.25),

        tf.keras.layers.Conv2D(64, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling2D(2, 2),
        tf.keras.layers.Dropout(0.25),

        tf.keras.layers.Conv2D(128, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling2D(2, 2),
        tf.keras.layers.Dropout(0.3),

        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        tf.keras.layers.Dropout(0.4),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

model = create_optimized_model(len(class_names))
model.summary()

# Step 7: Callbacks
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6, verbose=1)
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
checkpoint = tf.keras.callbacks.ModelCheckpoint(save_path, monitor='val_accuracy', save_best_only=True, verbose=1)

# Step 8: Train the model
logger.info("Starting model training...")
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=EPOCHS,
    class_weight=class_weights,
    callbacks=[reduce_lr, early_stop, checkpoint],
    verbose=1
)

# Step 9: Evaluate the model
test_loss, test_accuracy = model.evaluate(test_dataset)
logger.info(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

# Step 10: Plot accuracy and loss
def plot_training_history(history):
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.savefig("training_history.png")
    plt.close()

plot_training_history(history)

# Step 11: Confusion matrix and classification report
y_true = []
y_pred = []
for images, labels in test_dataset:
    predictions = model.predict(images, verbose=0)
    y_true.extend(labels.numpy())
    y_pred.extend(np.argmax(predictions, axis=1))

print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=class_names))

plt.figure(figsize=(10, 8))
conf_matrix = confusion_matrix(y_true, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.savefig("confusion_matrix.png")
plt.close()

# Step 12: Save misclassified images for analysis
misclassified_dir = os.path.join(os.path.dirname(save_path), "misclassified")
os.makedirs(misclassified_dir, exist_ok=True)

for images, labels in test_dataset.unbatch():
    image = images.numpy()
    label = int(labels.numpy())
    pred = np.argmax(model.predict(np.expand_dims(image, axis=0), verbose=0))
    if pred != label:
        img_path = os.path.join(misclassified_dir, f"{class_names[label]}_pred_{class_names[pred]}_{np.random.randint(10000)}.png")
        tf.keras.utils.save_img(img_path, image)  # Updated to use tf.keras.utils.save_img
        logger.info(f"Saved misclassified image: {img_path}")

# Step 13: Save the final model
model.save(save_path)
logger.info(f"Model saved at {save_path}")

Found 8000 files belonging to 8 classes.


  super().__init__(**kwargs)


Epoch 1/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 342ms/step - accuracy: 0.1345 - loss: 15.5672
Epoch 1: val_accuracy improved from -inf to 0.13000, saving model to C:\Users\abdhe\OneDrive\Documents\GitHub\BloodMark\model\model.h5




[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 381ms/step - accuracy: 0.1345 - loss: 15.5364 - val_accuracy: 0.1300 - val_loss: 5.6173 - learning_rate: 0.0010
Epoch 2/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 364ms/step - accuracy: 0.1424 - loss: 4.8232
Epoch 2: val_accuracy improved from 0.13000 to 0.18500, saving model to C:\Users\abdhe\OneDrive\Documents\GitHub\BloodMark\model\model.h5




[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 387ms/step - accuracy: 0.1425 - loss: 4.8196 - val_accuracy: 0.1850 - val_loss: 3.2109 - learning_rate: 0.0010
Epoch 3/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 364ms/step - accuracy: 0.1664 - loss: 2.9228
Epoch 3: val_accuracy improved from 0.18500 to 0.19312, saving model to C:\Users\abdhe\OneDrive\Documents\GitHub\BloodMark\model\model.h5




[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 385ms/step - accuracy: 0.1664 - loss: 2.9215 - val_accuracy: 0.1931 - val_loss: 2.3564 - learning_rate: 0.0010
Epoch 4/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 364ms/step - accuracy: 0.1862 - loss: 2.2997
Epoch 4: val_accuracy did not improve from 0.19312
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 382ms/step - accuracy: 0.1862 - loss: 2.2993 - val_accuracy: 0.1181 - val_loss: 2.4402 - learning_rate: 0.0010
Epoch 5/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 368ms/step - accuracy: 0.1816 - loss: 2.1296
Epoch 5: val_accuracy did not improve from 0.19312
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 384ms/step - accuracy: 0.1816 - loss: 2.1294 - val_accuracy: 0.1181 - val_loss: 3.0488 - learning_rate: 0.0010
Epoch 6/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 362ms/step - accuracy: 0.1663 - loss: 2.089



[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 373ms/step - accuracy: 0.2078 - loss: 2.0195 - val_accuracy: 0.1963 - val_loss: 2.0390 - learning_rate: 0.0010
Epoch 9/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 388ms/step - accuracy: 0.2112 - loss: 2.0069
Epoch 9: val_accuracy did not improve from 0.19625
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 407ms/step - accuracy: 0.2111 - loss: 2.0069 - val_accuracy: 0.1106 - val_loss: 4.0711 - learning_rate: 0.0010
Epoch 10/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 378ms/step - accuracy: 0.1956 - loss: 2.0097
Epoch 10: val_accuracy did not improve from 0.19625
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 394ms/step - accuracy: 0.1956 - loss: 2.0098 - val_accuracy: 0.1294 - val_loss: 2.5356 - learning_rate: 0.0010
Epoch 11/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 327ms/step - accuracy: 0.2010 - loss: 2.



[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 372ms/step - accuracy: 0.2012 - loss: 2.0000 - val_accuracy: 0.2456 - val_loss: 1.9786 - learning_rate: 0.0010
Epoch 13/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 356ms/step - accuracy: 0.2187 - loss: 1.9764
Epoch 13: val_accuracy did not improve from 0.24563
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 375ms/step - accuracy: 0.2187 - loss: 1.9764 - val_accuracy: 0.1737 - val_loss: 1.9767 - learning_rate: 0.0010
Epoch 14/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 354ms/step - accuracy: 0.2062 - loss: 1.9868
Epoch 14: val_accuracy did not improve from 0.24563
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 374ms/step - accuracy: 0.2062 - loss: 1.9868 - val_accuracy: 0.1287 - val_loss: 2.1775 - learning_rate: 0.0010
Epoch 15/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 375ms/step - accuracy: 0.2003 - loss: 



[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 361ms/step - accuracy: 0.2280 - loss: 1.9150 - val_accuracy: 0.2606 - val_loss: 1.8129 - learning_rate: 5.0000e-04
Epoch 23/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 317ms/step - accuracy: 0.2161 - loss: 1.9372
Epoch 23: val_accuracy did not improve from 0.26063
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 332ms/step - accuracy: 0.2161 - loss: 1.9372 - val_accuracy: 0.2163 - val_loss: 1.9554 - learning_rate: 5.0000e-04
Epoch 24/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 324ms/step - accuracy: 0.2023 - loss: 1.9401
Epoch 24: val_accuracy did not improve from 0.26063
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 339ms/step - accuracy: 0.2023 - loss: 1.9401 - val_accuracy: 0.1181 - val_loss: 2.8811 - learning_rate: 5.0000e-04
Epoch 25/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 334ms/step - accuracy: 0.2



[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 384ms/step - accuracy: 0.2290 - loss: 1.8716 - val_accuracy: 0.2612 - val_loss: 1.7497 - learning_rate: 6.2500e-05
Epoch 45/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 344ms/step - accuracy: 0.2314 - loss: 1.8535
Epoch 45: val_accuracy did not improve from 0.26125
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 361ms/step - accuracy: 0.2314 - loss: 1.8535 - val_accuracy: 0.1650 - val_loss: 2.4480 - learning_rate: 6.2500e-05
Epoch 46/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 363ms/step - accuracy: 0.2353 - loss: 1.8546
Epoch 46: val_accuracy did not improve from 0.26125
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 383ms/step - accuracy: 0.2352 - loss: 1.8546 - val_accuracy: 0.1813 - val_loss: 2.1980 - learning_rate: 6.2500e-05
Epoch 47/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 330ms/step - accuracy: 0.2



[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 345ms/step - accuracy: 0.2309 - loss: 1.8579 - val_accuracy: 0.2738 - val_loss: 1.7634 - learning_rate: 6.2500e-05
Epoch 49/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 349ms/step - accuracy: 0.2263 - loss: 1.8639
Epoch 49: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.

Epoch 49: val_accuracy did not improve from 0.27375
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 365ms/step - accuracy: 0.2263 - loss: 1.8639 - val_accuracy: 0.2619 - val_loss: 1.7845 - learning_rate: 6.2500e-05
Epoch 50/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 332ms/step - accuracy: 0.2291 - loss: 1.8538
Epoch 50: val_accuracy did not improve from 0.27375
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 348ms/step - accuracy: 0.2292 - loss: 1.8538 - val_accuracy: 0.1225 - val_loss: 3.2120 - learning_rate: 3.1250e-05
Restoring model weights fr

