In [30]:
import tensorflow as tf
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Dense, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing import image
import numpy as np
import os
import shutil
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

In [31]:
# ==================== CONFIGURATION ====================
IMG_HEIGHT, IMG_WIDTH = 224, 224
BATCH_SIZE = 32
EPOCHS = 50
INITIAL_LR = 1e-4

In [43]:
# Paths
train_dir = '/mnt/k/ml/clg_ml/domain_classification/train'
val_dir = '/mnt/k/ml/clg_ml/domain_classification/val'
test_dir = '/mnt/k/ml/clg_ml/domain_classification/test'
google_images_dir = '/mnt/k/ml/clg_ml/imgs_from_google/domainClassifier/'
MODEL_PATH = 'domain_classifier_best.h5'

In [33]:
# ==================== DATA PREPARATION ====================
def create_data_generators():
    # Training data generator with augmentation
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True,
        zoom_range=0.2,
        shear_range=0.2,
        fill_mode='nearest'
    )
    
    # Validation and test data generator (only rescaling)
    val_test_datagen = ImageDataGenerator(rescale=1./255)
    
    # Create generators
    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(IMG_HEIGHT, IMG_WIDTH),
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=True
    )
    
    val_generator = val_test_datagen.flow_from_directory(
        val_dir,
        target_size=(IMG_HEIGHT, IMG_WIDTH),
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=False
    )
    
    test_generator = val_test_datagen.flow_from_directory(
        test_dir,
        target_size=(IMG_HEIGHT, IMG_WIDTH),
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=False
    )
    
    return train_generator, val_generator, test_generator


In [34]:
# ==================== MODEL ARCHITECTURE ====================
def build_domain_classifier():
    # Load pre-trained DenseNet121
    base_model = DenseNet121(
        weights='imagenet',
        include_top=False,
        input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)
    )
    
    # Freeze base model initially
    base_model.trainable = False
    
    # Add custom head
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = BatchNormalization()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = BatchNormalization()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.3)(x)
    predictions = Dense(2, activation='softmax', name='domain_output')(x)
    
    model = Model(inputs=base_model.input, outputs=predictions)
    
    return model, base_model

In [35]:
# ==================== TRAINING STRATEGY ====================
def get_callbacks():
    """Define training callbacks"""
    checkpoint = ModelCheckpoint(
        MODEL_PATH,
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=1
    )
    
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=1
    )
    
    reduce_lr = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=5,
        min_lr=1e-7,
        verbose=1
    )
    
    return [checkpoint, early_stop, reduce_lr]

In [36]:
def train_domain_classifier():
    # Create data generators
    train_gen, val_gen, test_gen = create_data_generators()
    
    # Build model
    model, base_model = build_domain_classifier()
    
    # Phase 1: Train only the head
    print("Phase 1: Training classifier head...")
    model.compile(
        optimizer=Adam(INITIAL_LR),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    history_head = model.fit(
        train_gen,
        epochs=10,
        validation_data=val_gen,
        callbacks=get_callbacks(),
        verbose=1
    )
    
    # Phase 2: Fine-tune deeper layers
    print("Phase 2: Fine-tuning deeper layers...")
    
    # Unfreeze last 50 layers of base model
    base_model.trainable = True
    for layer in base_model.layers[:-50]:
        layer.trainable = False
    
    model.compile(
        optimizer=Adam(INITIAL_LR/10),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    history_full = model.fit(
        train_gen,
        epochs=EPOCHS,
        initial_epoch=history_head.epoch[-1] + 1,
        validation_data=val_gen,
        callbacks=get_callbacks(),
        verbose=1
    )
    
    return model, train_gen, val_gen, test_gen

In [38]:
def feedback_loop_retraining(
    retrain_dir=os.path.join(os.path.dirname(google_images_dir), 'retrain'),
    model_path=MODEL_PATH,
    min_images=50,
    epochs=10,
    learning_rate=1e-4,
    batch_size=32
):
    import glob

    # Count images in retrain folders
    oral_images = glob.glob(os.path.join(retrain_dir, 'oral_disorder', '*'))
    skin_images = glob.glob(os.path.join(retrain_dir, 'skin_diseases', '*'))

    total_images = len(oral_images) + len(skin_images)
    print(f"🔁 Feedback loop check: Found {total_images} images in retrain folder.")

    if total_images < min_images:
        print(f"⏳ Not enough data to retrain. Need {min_images}, have {total_images}.")
        return

    print(f"\n🚀 Triggering fine-tuning using {total_images} new images...")

    # Load the best model
    model = tf.keras.models.load_model(model_path)

    # Unfreeze last few layers for fine-tuning
    if hasattr(model, 'layers'):
        base_model = model.layers[0]  # assuming base model is first
        if hasattr(base_model, 'trainable'):
            base_model.trainable = True
            for layer in base_model.layers[:-30]:  # Unfreeze last 30 layers
                layer.trainable = False
            print("✅ Last 30 layers of base model unfrozen for fine-tuning.")

    # Compile
    model.compile(
        optimizer=Adam(learning_rate),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    # Create generator for retrain data
    datagen = ImageDataGenerator(rescale=1. / 255, validation_split=0.1)

    train_gen = datagen.flow_from_directory(
        retrain_dir,
        target_size=(IMG_HEIGHT, IMG_WIDTH),
        batch_size=batch_size,
        class_mode='categorical',
        subset='training',
        shuffle=True
    )

    val_gen = datagen.flow_from_directory(
        retrain_dir,
        target_size=(IMG_HEIGHT, IMG_WIDTH),
        batch_size=batch_size,
        class_mode='categorical',
        subset='validation',
        shuffle=False
    )

    # Retrain
    history = model.fit(
        train_gen,
        validation_data=val_gen,
        epochs=epochs,
        callbacks=get_callbacks(),
        verbose=1
    )

    # Save the updated model
    model.save(model_path)
    print(f"💾 Model re-trained and saved to: {model_path}")

    # Optional: Clear retrain folder after training
    for folder in ['oral_disorder', 'skin_diseases']:
        folder_path = os.path.join(retrain_dir, folder)
        for file in os.listdir(folder_path):
            os.remove(os.path.join(folder_path, file))
    print("🧹 Cleared retrain folder after training.")

In [39]:
# ==================== EVALUATION ====================
def evaluate_model(model, test_generator):
    """Comprehensive model evaluation"""
    
    # Get true labels and predictions
    test_generator.reset()
    y_true = test_generator.classes
    y_pred_probs = model.predict(test_generator)
    y_pred = np.argmax(y_pred_probs, axis=1)
    
    # Classification report
    class_names = list(test_generator.class_indices.keys())
    print("\n" + "="*50)
    print("CLASSIFICATION REPORT")
    print("="*50)
    print(classification_report(y_true, y_pred, target_names=class_names))
    
    # Confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    print("\nCONFUSION MATRIX:")
    print(cm)
    
    # Overall accuracy
    test_accuracy = np.sum(y_pred == y_true) / len(y_true)
    print(f"\nOverall Test Accuracy: {test_accuracy:.4f}")
    
    return test_accuracy, y_true, y_pred

In [40]:
def classify_google_images(model_path, google_images_dir, confidence_threshold=0.8):
    # Load model
    model = tf.keras.models.load_model(model_path)
    # Class names(must match subfolder names exactly)
    class_names = ['oral_disorder', 'skin_diseases']
    # Files to process
    image_files = [f for f in os.listdir(google_images_dir)
                   if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.webp'))]
    print(f"🔍 Found {len(image_files)} images to classify in: {google_images_dir}")
    # Retrain folder base path
    retrain_base = os.path.join(
        os.path.dirname(google_images_dir),  # Goes up one level from image folder
        'retrain'
    )
    os.makedirs(retrain_base, exist_ok=True)
    # Ensure subfolders exist
    for class_name in class_names:
        os.makedirs(os.path.join(retrain_base, class_name), exist_ok=True)

    for img_file in image_files:
        img_path = os.path.join(google_images_dir, img_file)
        # Load and preprocess image
        img = image.load_img(img_path, target_size=(224, 224))
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0) / 255.0
        # Predict
        predictions = model.predict(img_array, verbose=0)
        predicted_class_index = np.argmax(predictions[0])
        predicted_class = class_names[predicted_class_index]
        confidence = np.max(predictions[0])

        if confidence < confidence_threshold:
            print(f"\n⚠️  LOW CONFIDENCE: {img_file}")
            print(f"Predicted: {predicted_class} ({confidence * 100:.2f}%)")

            # Ask doctor for correct label
            doctor_input = input("Doctor input (oral_disorder / skin_diseases): ").strip().lower()
            while doctor_input not in class_names:
                print("❌ Invalid input. Please enter 'oral_disorder' or 'skin_diseases'.")
                doctor_input = input("Doctor input: ").strip().lower()
            # Move image to retrain folder
            dest_path = os.path.join(retrain_base, doctor_input, img_file)
            shutil.move(img_path, dest_path)
            print(f"✅ Moved to: retrain/{doctor_input}/{img_file}")
        else:
            print(f"{img_file}: ✅ {predicted_class} ({confidence * 100:.2f}%)")
    print("\n📦 Classification complete. Low-confidence images moved for retraining.")

In [None]:
# ==================== MAIN EXECUTION ====================
if __name__ == "__main__":
    print("Starting Domain Classifier Training...")
    print(f"Training samples: {len(os.listdir(os.path.join(train_dir, 'skin_diseases')))} skin + {len(os.listdir(os.path.join(train_dir, 'oral_disorder')))} oral")
    print(f"Validation samples: {len(os.listdir(os.path.join(val_dir, 'skin_diseases')))} skin + {len(os.listdir(os.path.join(val_dir, 'oral_disorder')))} oral")
    
    # Train the model
    model, train_gen, val_gen, test_gen = train_domain_classifier()
    
    # Evaluate on test set
    print("\nEvaluating on test set...")
    test_accuracy, y_true, y_pred = evaluate_model(model, test_gen)
    
    # Load best model for final evaluation
    print("\nLoading best model for final evaluation...")
    best_model = tf.keras.models.load_model(MODEL_PATH)
    final_accuracy, _, _ = evaluate_model(best_model, test_gen)
    
    print(f"\n🎯 Final Domain Classifier Performance: {final_accuracy:.4f}")

In [45]:
classify_google_images('domain_classifier_best.h5', '/mnt/k/ml/clg_ml/imgs_from_google/domainClassifier/', confidence_threshold=0.8)



🔍 Found 12 images to classify in: /mnt/k/ml/clg_ml/imgs_from_google/domainClassifier/
be_ke_le.jpeg: ✅ skin_diseases (100.00%)
be_ke_le2.jpg: ✅ skin_diseases (100.00%)
ecz.webp: ✅ skin_diseases (98.70%)
ecz2.jpg: ✅ skin_diseases (99.31%)
hypo.jpeg: ✅ oral_disorder (100.00%)
hypo2.jpg: ✅ oral_disorder (100.00%)
hypo3.jpg: ✅ oral_disorder (99.90%)
mo_ul.jpeg: ✅ oral_disorder (100.00%)
mo_ul2.jpg: ✅ oral_disorder (99.99%)
mo_ul3.jpg: ✅ oral_disorder (99.97%)
pso.webp: ✅ skin_diseases (99.99%)
pso2.jpg: ✅ skin_diseases (100.00%)

📦 Classification complete. Low-confidence images moved for retraining.


In [42]:
feedback_loop_retraining(
    retrain_dir=os.path.join(os.path.dirname(google_images_dir), 'retrain'),
    model_path=MODEL_PATH,
    min_images=50,
    epochs=5
)

🔁 Feedback loop check: Found 0 images in retrain folder.
⏳ Not enough data to retrain. Need 50, have 0.
