In [4]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras import layers, models

from sklearn.utils.class_weight import compute_class_weight

import ipywidgets as widgets
from IPython.display import display
from io import BytesIO
import matplotlib.pyplot as plt

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
tf.config.list_physical_devices('GPU')

from tensorflow.python.client import device_lib

device_lib.list_local_devices()

tf.test.is_built_with_cuda()

tf.debugging.set_log_device_placement(True)

Num GPUs Available:  1


In [2]:
# Paths
train_img_dir = r"E:\YOLO_format\train\images"
train_label_dir = r"E:\YOLO_format\train\labels"

val_img_dir = r"E:\YOLO_format\valid\images"
val_label_dir= r"E:\YOLO_format\valid\labels"

def load_affectnet_labels(img_dir, label_dir):
    img_paths = []
    labels = []
    for file in tqdm(os.listdir(label_dir)):
        if file.endswith(".txt"):
            label_file = os.path.join(label_dir, file)
            image_file = os.path.join(img_dir, file.replace(".txt", ".jpg"))
            if os.path.exists(image_file):
                with open(label_file, "r") as f:
                    parts = f.readline().strip().split()
                    if len(parts) > 0:
                        expr = int(parts[0])
                        if 0 <= expr <= 7:   # valid emotion labels
                            img_paths.append(image_file)
                            labels.append(expr)
    return img_paths, np.array(labels)

train_paths, train_labels = load_affectnet_labels(train_img_dir, train_label_dir)
val_paths, val_labels = load_affectnet_labels(val_img_dir, val_label_dir)

num_classes = len(np.unique(train_labels))
print(f"Training samples: {len(train_paths)}")
print(f"Validation samples: {len(val_paths)}")
print(f"Classes found: {num_classes}")


100%|███████████████████████████████████████████████████████████████████████████| 17101/17101 [00:17<00:00, 984.99it/s]
100%|████████████████████████████████████████████████████████████████████████████| 5406/5406 [00:04<00:00, 1135.09it/s]

Training samples: 12167
Validation samples: 3546
Classes found: 8





In [7]:
IMG_SIZE = (224, 224)
BATCH = 8
AUTOTUNE = tf.data.AUTOTUNE

def preprocess_image(img_path, label):
    img = tf.io.read_file(img_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.cast(img, tf.float32) / 255.0
    return img, label

def augment(img, label):
    img = tf.image.random_flip_left_right(img)
    img = tf.image.random_brightness(img, 0.15)
    img = tf.image.random_contrast(img, 0.85, 1.15)
    img = tf.image.random_saturation(img, 0.9, 1.1)
    return img, label

def build_dataset(file_paths, labels, training=True):
    ds = tf.data.Dataset.from_tensor_slices((file_paths, labels))
    if training:
        ds = ds.shuffle(len(file_paths))
    ds = ds.map(preprocess_image, num_parallel_calls=AUTOTUNE)
    if training:
        ds = ds.map(augment, num_parallel_calls=AUTOTUNE)
    ds = ds.batch(BATCH).prefetch(AUTOTUNE)
    return ds

train_ds = build_dataset(train_paths, train_labels, training=True)
val_ds = build_dataset(val_paths, val_labels, training=False)


In [8]:
class_weights_arr = compute_class_weight(
    'balanced',
    classes=np.unique(train_labels),
    y=train_labels
)
class_weights = dict(enumerate(class_weights_arr))
print("Class weights:", class_weights)


Class weights: {0: 0.7506786771964462, 1: 1.0467136958017893, 2: 0.8154825737265415, 3: 0.7912981269510926, 4: 4.650993883792049, 5: 2.044186827956989, 6: 0.9613621997471555, 7: 0.6765458185053381}


In [9]:
def build_model(num_classes, input_shape=(224,224,3)):
    base = tf.keras.applications.MobileNetV2(
        input_shape=input_shape,
        include_top=False,
        weights='imagenet'
    )
    base.trainable = False

    inputs = layers.Input(shape=input_shape)
    x = base(inputs, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.35)(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.25)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    return models.Model(inputs, outputs)

model = build_model(num_classes)
model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.summary()


Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 mobilenetv2_1.00_224 (Funct  (None, 7, 7, 1280)       2257984   
 ional)                                                          
                                                                 
 global_average_pooling2d_1   (None, 1280)             0         
 (GlobalAveragePooling2D)                                        
                                                                 
 dropout_2 (Dropout)         (None, 1280)              0         
                                                                 
 dense_2 (Dense)             (None, 256)               327936    
                                                                 
 dropout_3 (Dropout)         (None, 256)               0   

In [10]:
ckpt = tf.keras.callbacks.ModelCheckpoint("affectnet_best.h5", monitor='val_accuracy', save_best_only=True)
early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)
reduce = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)

# Train only the classification head first
history1 = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=8,
    class_weight=class_weights,
    callbacks=[ckpt, early, reduce]
)

# Fine-tune
model.get_layer('mobilenetv2_1.00_224').trainable = True
for layer in model.layers[:-30]:
    layer.trainable = False

model.compile(optimizer=tf.keras.optimizers.Adam(1e-5),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history2 = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15,
    class_weight=class_weights,
    callbacks=[ckpt, early, reduce]
)


Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [16]:

class_names = ["neutral", "happy", "sad", "surprise", "fear", "disgust", "anger", "contempt"]

# Creating upload widget
uploader = widgets.FileUpload(accept="image/*", multiple=True)
out = widgets.Output()
display(uploader)
display(out)
display(uploader)

# Handler to run prediction directly after upload
def on_upload_change(change):
    with out:
        out.clear_output()
        for item in change.new:
            # Read the image bytes
            img_bytes = item.content
            img = Image.open(BytesIO(img_bytes)).convert("RGB")

            # Preprocess image
            img = img.resize(IMG_SIZE)
            img_array = np.array(img) / 255.0
            img_array = np.expand_dims(img_array, axis=0)

            # Run model prediction
            preds = model.predict(img_array)
            top_idx = np.argmax(preds[0])
            confidence = preds[0][top_idx] * 100

            # Display result
            print(f" {item.name}")
            print(f"Predicted Emotion: {class_names[top_idx]} ({confidence:.2f}%)")

            plt.figure(figsize=(7, 4))
            plt.bar(class_names, preds[0]*100)
            plt.xticks(rotation=45)
            plt.ylabel("Probability (%)")
            plt.title(f"Prediction for {item.name}")
            plt.show()

# Attach the handler to the widget
uploader.observe(on_upload_change, names='value')


FileUpload(value=(), accept='image/*', description='Upload', multiple=True)

Output()

In [11]:
CLASS_NAMES = ["neutral", "happy", "sad", "surprise", "fear", "disgust", "anger", "contempt"]

def predict_emotion(img_path):
    img = Image.open(img_path).convert('RGB')
    img = img.resize(IMG_SIZE)
    arr = np.array(img)/255.0
    arr = np.expand_dims(arr, 0)
    pred = model.predict(arr)[0]
    top_idx = np.argmax(pred)
    print(f"Predicted: {CLASS_NAMES[top_idx]} ({pred[top_idx]*100:.2f}%)")
    return top_idx

predict_emotion(val_paths[0])


Predicted: contempt (65.38%)


7