In [1]:
import os
import numpy as np
from tensorflow.keras.applications import NASNetLarge
from tensorflow.keras.applications.nasnet import preprocess_input
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from PIL import Image

def log_and_print(message):
    print(message)

def load_dataset(input_dir, target_size=(331, 331)):
    """
    Load and preprocess images and labels from the dataset directory.
    Args:
        input_dir (str): Path to the dataset directory.
        target_size (tuple): Target size for resizing images.
    Returns:
        np.array, np.array: Arrays of images and corresponding labels.
    """
    images = []
    labels = []
    class_dirs = [os.path.join(input_dir, d) for d in os.listdir(input_dir) if os.path.isdir(os.path.join(input_dir, d))]
    
    if not class_dirs:
        raise ValueError(f"No subdirectories found in {input_dir}. Please check the dataset structure.")
    
    for class_dir in class_dirs:
        class_name = os.path.basename(class_dir)
        image_paths = [os.path.join(class_dir, img) for img in os.listdir(class_dir) if img.endswith(('.jpg', '.png', '.jpeg'))]
        for img_path in image_paths:
            try:
                img = Image.open(img_path).resize(target_size)
                images.append(np.array(img))
                labels.append(class_name)
            except Exception as e:
                log_and_print(f"Error processing {img_path}: {e}")
    
    if not images or not labels:
        raise ValueError("No images or labels were loaded. Please check the dataset directory and file extensions.")
    
    images = np.array(images)
    labels = np.array(labels)
    log_and_print(f"Loaded {len(images)} images across {len(set(labels))} classes.")
    return images, labels

# 2. Extract deep features
def extract_features(images, model):
    preprocessed_images = preprocess_input(images)
    features = model.predict(preprocessed_images)
    return features

# 3. Apply SMOTE to balance features
def apply_smote(features, labels):
    smote = SMOTE()
    features = features.reshape(features.shape[0], -1)  # Flatten features for SMOTE
    labels_encoded = LabelEncoder().fit_transform(labels)
    features_smote, labels_smote = smote.fit_resample(features, labels_encoded)
    log_and_print(f"Applied SMOTE: Original size = {len(features)}, Augmented size = {len(features_smote)}")
    return features_smote, labels_smote

# 4. Build and train classifier
def build_classifier(input_shape, num_classes):
    model = Sequential([
        Flatten(input_shape=input_shape),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def main():
    INPUT_DIR = "/kaggle/input/cropped-dataset-clean/cropped_dataset (())"
    TARGET_SIZE = (331, 331)
    OUTPUT_DIR = "/kaggle/working/final_model"

    # Step 1: Load dataset
    try:
        images, labels = load_dataset(INPUT_DIR, target_size=TARGET_SIZE)
    except ValueError as e:
        log_and_print(str(e))
        return

    # Shuffle and split the dataset
    images, labels = shuffle(images, labels, random_state=42)

    if len(images) < 10:
        log_and_print("Dataset is too small to split. Please ensure the dataset has enough samples.")
        return

    X_train, X_test, y_train, y_test = train_test_split(
        images, labels, test_size=0.2, random_state=42, stratify=labels
    )

    if len(X_train) == 0 or len(X_test) == 0:
        log_and_print("Train or test set is empty. Adjust `test_size` or add more data.")
        return

    # Step 2: Extract deep features using NASNetLarge
    base_model = NASNetLarge(weights="imagenet", include_top=False, pooling="avg", input_shape=(331, 331, 3))
    feature_model = Model(inputs=base_model.input, outputs=base_model.output)

    log_and_print("Extracting deep features...")
    train_features = extract_features(X_train, feature_model)
    test_features = extract_features(X_test, feature_model)

    # Step 3: Apply SMOTE to balance training features
    smote_features, smote_labels = apply_smote(train_features, y_train)

    # Step 4: Convert labels to one-hot encoding
    label_encoder = LabelEncoder()
    smote_labels_encoded = label_encoder.fit_transform(smote_labels)
    smote_labels_one_hot = to_categorical(smote_labels_encoded)

    test_labels_encoded = label_encoder.transform(y_test)
    test_labels_one_hot = to_categorical(test_labels_encoded)

    # Step 5: Build and train classifier
    log_and_print("Building and training classifier...")
    classifier = build_classifier(input_shape=smote_features.shape[1:], num_classes=len(label_encoder.classes_))

    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    classifier.fit(
        smote_features,
        smote_labels_one_hot,
        validation_data=(test_features, test_labels_one_hot),
        epochs=50,
        batch_size=32,
        callbacks=[early_stopping]
    )

    # Step 6: Save model
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    model_path = os.path.join(OUTPUT_DIR, "facial_expression_model.h5")
    classifier.save(model_path)
    log_and_print(f"Model saved at {model_path}")

if __name__ == "__main__":
    main()


No images or labels were loaded. Please check the dataset directory and file extensions.
