In [4]:
import os
import numpy as np
from PIL import Image
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, UpSampling2D, GlobalAveragePooling2D, Dense, Input
from tensorflow.keras.utils import to_categorical

def load_images_with_labels(base_directory, use_subfolders_for_training=True):
    images = []
    labels = []
    file_paths = []
    label_map = {'no_sunglasses': 0, 'sunglasses': 1}

    for root, dirs, files in os.walk(base_directory):
        if use_subfolders_for_training:
            # Skip the main folder level and only process images in subfolders
            if root == base_directory:
                continue
        else:
            # Only process images in the main folder, skip subfolders
            if root != base_directory:
                continue

        for filename in files:
            if filename.endswith(('.png', '.pgm')):
                file_path = os.path.join(root, filename)
                try:
                    img = Image.open(file_path).convert('RGB')
                    img = img.resize((64, 64))
                    img_array = np.array(img) / 255.0
                    # Label assignment based on presence of "sunglasses" in the filename
                    if "sunglasses" in filename:
                        label = label_map['sunglasses']
                    else:
                        label = label_map['no_sunglasses']
                    
                    images.append(img_array)
                    labels.append(label)
                    file_paths.append(file_path)
                except Exception as e:
                    print(f"Error processing {filename}: {e}")
    return np.array(images), np.array(labels), file_paths

def create_combined_model(input_shape, num_classes):
    print("creating model")
    inputs = Input(shape=input_shape)

    # Encoder with more filters and layers
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    encoded = MaxPooling2D((2, 2), padding='same')(x)

    # Decoder to reconstruct image with matching dimensions
    x = UpSampling2D((2, 2))(encoded)
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same', name='decoded')(x)

    # Classification head for orientation prediction
    x = GlobalAveragePooling2D()(encoded)
    classification_output = Dense(num_classes, activation='softmax', name='classification')(x)

    # build model 
    model = Model(inputs, [decoded, classification_output])
    
    model.compile(
        optimizer='adam',
        loss={'decoded': 'mean_squared_error', 'classification': 'categorical_crossentropy'},
        loss_weights={'decoded': 1.0, 'classification': 0.5},
        metrics={'classification': ['accuracy']}
    )
    print("creating model finished")
    return model

In [2]:
main_directory = 'C:/Users/conne_m8uvfz1/OneDrive/Documents/RHIT/head-image-classification/faces'

X_train, y_train = [], []
X_test, y_test = [], []
paths_train, paths_test = [], []
print("loading images")
for person_folder in os.listdir(main_directory):
    person_path = os.path.join(main_directory, person_folder)
    if os.path.isdir(person_path):
        X_test_person, y_test_person, paths_test_person = load_images_with_labels(person_path, use_subfolders_for_training=False)
        X_test.extend(X_test_person)
        y_test.extend(y_test_person)
        paths_test.extend(paths_test_person)

        X_train_person, y_train_person, paths_train_person = load_images_with_labels(person_path, use_subfolders_for_training=True)
        X_train.extend(X_train_person)
        y_train.extend(y_train_person)
        paths_train.extend(paths_train_person)
print("loading finished")
X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)

loading images
loading finished


In [6]:
if len(X_test) == 0 or len(y_test) == 0:
    print("No test data found. Ensure the main directory contains images for testing.")
elif len(X_train) == 0 or len(y_train) == 0:
    print("No training data found. Ensure the subdirectories contain images for training.")
else:
    # Convert labels to categorical
    y_train_cat = to_categorical(y_train, num_classes=4)
    y_test_cat = to_categorical(y_test, num_classes=4)
    # Create and compile the model
    combined_model = create_combined_model(input_shape=(64, 64, 3), num_classes=4)
    print("Training model")
    # Train the model
    combined_model.fit(
        X_train,
        {'decoded': X_train, 'classification': y_train_cat},
        batch_size=64,
        epochs=2,
        validation_data=(X_test, {'decoded': X_test, 'classification': y_test_cat})
    )

    # Evaluate and display test predictions
    decoded_imgs, predictions = combined_model.predict(X_test)
    predicted_labels = np.argmax(predictions, axis=1)
    print("Training model finished!!")  
    glasses_map = {0: 'no_sunglasses', 1: 'sunglasses'}
    correct = 0
    wrong = 0
    for i in range(len(y_test)):
        actual = glasses_map[y_test[i]]
        predicted = glasses_map[predicted_labels[i]]
        if actual == predicted:
            correct += 1
        else:
            print(f"Got image wrong: {paths_test[i]}, predicted: {predicted}, actual: {actual}")
            wrong += 1

    print(f"Total Correct: {correct}")
    print(f"Total Wrong: {wrong}")

creating model
creating model finished
Training model
Epoch 1/2
[1m555/555[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m167s[0m 296ms/step - classification_accuracy: 0.6136 - loss: 0.3582 - val_classification_accuracy: 0.9573 - val_loss: 0.0739
Epoch 2/2
[1m555/555[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m160s[0m 288ms/step - classification_accuracy: 0.9326 - loss: 0.0927 - val_classification_accuracy: 0.9941 - val_loss: 0.0154
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 52ms/step
Training model finished!!
Got image wrong: C:/Users/conne_m8uvfz1/OneDrive/Documents/RHIT/head-image-classification/faces\danieln\danieln_right_angry_sunglasses.png, predicted: no_sunglasses, actual: sunglasses
Got image wrong: C:/Users/conne_m8uvfz1/OneDrive/Documents/RHIT/head-image-classification/faces\danieln\danieln_right_angry_sunglasses_2.png, predicted: no_sunglasses, actual: sunglasses
Got image wrong: C:/Users/conne_m8uvfz1/OneDrive/Documents/RHIT/head-image-classific