In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from kerastuner.tuners import RandomSearch

# ----------------------------
# CONFIGURATION
# ----------------------------

# Set all config in one dictionary for easy access
CONFIG = {
    'data': {
        'train_dir': r'D:\collage project\fracture detection comp vision\fracture_detection_computer_vision\src\components\data\processed\train',
        'val_dir': r'D:\collage project\fracture detection comp vision\fracture_detection_computer_vision\src\components\data\processed\valid',
        'metadata_path': r'D:\collage project\fracture detection comp vision\fracture_detection_computer_vision\src\components\data\processed\processed_metadata.csv',
        'image_size': (224, 224),  # Input size expected by DenseNet121
        'batch_size': 32
    },
    'training': {
        'epochs': 10  # Can increase after initial testing
    }
}

# ----------------------------
# DATA LOADER (Custom Generator)
# ----------------------------

class NPYDataGenerator(tf.keras.utils.Sequence):
    """
    Custom data generator that reads preprocessed image arrays stored in .npy format
    Useful for loading data dynamically without keeping everything in memory.
    """
    def __init__(self, paths, labels, batch_size=32, shuffle=True):
        self.paths = paths
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        # Total number of batches per epoch
        return int(np.ceil(len(self.paths) / self.batch_size))

    def __getitem__(self, index):
        # Generate indices for the batch
        batch_paths = self.paths[index * self.batch_size:(index + 1) * self.batch_size]
        batch_labels = self.labels[index * self.batch_size:(index + 1) * self.batch_size]

        # Load image arrays
        batch_images = [np.load(p) for p in batch_paths]
        return np.array(batch_images), np.array(batch_labels)

    def on_epoch_end(self):
        # Shuffle data at the end of each epoch
        if self.shuffle:
            idx = np.arange(len(self.paths))
            np.random.shuffle(idx)
            self.paths = self.paths[idx]
            self.labels = self.labels[idx]


# ----------------------------
# LOAD METADATA
# ----------------------------

def load_metadata():
    """
    Reads the CSV metadata and separates paths/labels into training and validation sets.
    """
    df = pd.read_csv(CONFIG['data']['metadata_path'])
    train_df = df[df['path'].str.contains('train')]
    val_df = df[df['path'].str.contains('valid')]
    return train_df['path'].values, train_df['label'].values, val_df['path'].values, val_df['label'].values


# ----------------------------
# MODEL BUILDER FUNCTION (used by Keras Tuner)
# ----------------------------

def build_model(hp):
    """
    Builds the transfer learning model using DenseNet121 as base.
    Hyperparameters are tuned using Keras Tuner.
    """
    # Load pretrained DenseNet121 model without the top (FC) layer
    base_model = DenseNet121(include_top=False, weights='imagenet', input_shape=(*CONFIG['data']['image_size'], 3))

    # Freeze base model to prevent updating its weights during initial training
    for layer in base_model.layers:
        layer.trainable = False

    # Add custom classification head
    x = base_model.output
    x = GlobalAveragePooling2D()(x)  # Converts 7x7x1024 to 1024
    x = Dense(hp.Int('dense_units', min_value=128, max_value=512, step=64), activation='relu')(x)
    x = Dropout(hp.Float('dropout_rate', 0.3, 0.7, step=0.1))(x)
    output = Dense(1, activation='sigmoid')(x)  # Binary classification

    # Compile the model with selected learning rate
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(
        optimizer=Adam(hp.Choice('learning_rate', [1e-4, 1e-5])),
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
    )
    return model


# ----------------------------
# HYPERPARAMETER TUNING
# ----------------------------

def tune_hyperparameters(X_train, y_train):
    """
    Perform hyperparameter search using a subset of the training data.
    """
    print("🔍 Starting hyperparameter tuning...")

    # Use a small fraction of training data to speed up tuning
    X_sub, _, y_sub, _ = train_test_split(X_train, y_train, test_size=0.8, random_state=42)
    train_gen = NPYDataGenerator(X_sub, y_sub, batch_size=CONFIG['data']['batch_size'])

    # Create tuner
    tuner = RandomSearch(
        build_model,
        objective='val_auc',
        max_trials=5,
        executions_per_trial=1,
        directory='tuner_logs',
        project_name='densenet_fracture'
    )

    # Create training and validation generators
    train_gen = NPYDataGenerator(X_train, y_train, batch_size=32, augment=True)
    val_gen = NPYDataGenerator(X_val, y_val, batch_size=32, augment=False)


    # Run the tuner
    tuner.search(train_gen, validation_data=(val_gen), epochs=3)


    # Return the best hyperparameters
    return tuner.get_best_hyperparameters(1)[0]


# ----------------------------
# FINAL TRAINING
# ----------------------------

def train_final_model(hp, X_train, y_train, X_val, y_val):
    """
    Train the final model using the best hyperparameters on the full dataset.
    """
    model = build_model(hp)

    # Initialize data generators
    train_gen = NPYDataGenerator(X_train, y_train, batch_size=CONFIG['data']['batch_size'])
    val_gen = NPYDataGenerator(X_val, y_val, batch_size=CONFIG['data']['batch_size'], shuffle=False)

    # Callbacks to save best model and stop early if needed
    callbacks = [
        EarlyStopping(monitor='val_auc', patience=3, mode='max', restore_best_weights=True),
        ModelCheckpoint('best_densenet_model.h5', monitor='val_auc', save_best_only=True, mode='max')
    ]

    # Train model
    history = model.fit(
        train_gen,
        validation_data=val_gen,
        epochs=CONFIG['training']['epochs'],
        callbacks=callbacks
    )

    # Evaluate on validation set
    print("✅ Final evaluation on validation set:")
    loss, acc, auc = model.evaluate(val_gen)
    print(f"Validation Loss: {loss:.4f}, Accuracy: {acc:.4f}, AUC: {auc:.4f}")

    # Save final model
    model.save("final_densenet_fracture_model.h5")

    return history


# ----------------------------
# PLOTTING
# ----------------------------

def plot_training_curves(history):
    """
    Plot training and validation curves for accuracy and AUC.
    """
    plt.figure(figsize=(12, 5))

    # Plot Accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title("Training vs Validation Accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend()

    # Plot AUC
    plt.subplot(1, 2, 2)
    plt.plot(history.history['auc'], label='Train AUC')
    plt.plot(history.history['val_auc'], label='Validation AUC')
    plt.title("Training vs Validation AUC")
    plt.xlabel("Epoch")
    plt.ylabel("AUC")
    plt.legend()

    plt.tight_layout()
    plt.savefig("training_curves_densenet.png")
    plt.show()


# ----------------------------
# MAIN PIPELINE
# ----------------------------

if __name__ == '__main__':
    # Step 1: Load file paths and labels
    X_train, y_train, X_val, y_val = load_metadata()

    # Step 2: Hyperparameter tuning using small subset
    best_hp = tune_hyperparameters(X_train, y_train)

    # Step 3: Train final model on full data with best hyperparameters
    history = train_final_model(best_hp, X_train, y_train, X_val, y_val)

    # Step 4: Visualize training progress
    plot_training_curves(history)


🔍 Starting hyperparameter tuning...
Reloading Tuner from tuner_logs\densenet_fracture\tuner0.json


NameError: name 'val_gen' is not defined