## VGG16 MODEL - With FER dataset

/kaggle/input/facialexpressionrecognition

In [92]:
# ====================================
#  IMPORT LIBRARIES
# ====================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

import tensorflow as tf
from tensorflow.keras import optimizers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical

# ====================================
#  LOAD DATASET
# ====================================
df = pd.read_csv('/kaggle/input/facialexpressionrecognition/fer2013.csv')

emotion_dict = {
    0: "Angry", 1: "Disgust", 2: "Fear", 3: "Happy",
    4: "Sad", 5: "Surprise", 6: "Neutral"
}

# Convert pixel strings to arrays
img_array = df.pixels.apply(lambda x: np.array(x.split(' ')).reshape(48, 48).astype('float32'))
img_array = np.stack(img_array, axis=0)

# Convert grayscale to RGB
img_features = np.array([cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) for img in img_array])

# Encode labels
le = LabelEncoder()
img_labels = le.fit_transform(df.emotion)
img_labels = to_categorical(img_labels)

# ====================================
#  SPLIT DATA
# ====================================
X_train, X_valid, y_train, y_valid = train_test_split(
    img_features, img_labels,
    test_size=0.2,
    stratify=img_labels,
    random_state=42
)

# Normalize
X_train = X_train / 255.
X_valid = X_valid / 255.

img_shape = X_train.shape[1:]
num_classes = y_train.shape[1]

# ====================================
#  TRANSFER LEARNING: VGG16
# ====================================
base_model = tf.keras.applications.VGG16(
    weights='imagenet',
    include_top=False,
    input_shape=img_shape
)

# Freeze base model layers
for layer in base_model.layers:
    layer.trainable = False

# Custom head
x = base_model.output
x = GlobalAveragePooling2D()(x)
output = Dense(num_classes, activation='softmax', name='out_layer')(x)

model = Model(inputs=base_model.input, outputs=output)
print(model.summary())

# ====================================
#  CALLBACKS
# ====================================
early_stopping = EarlyStopping(
    monitor='val_accuracy',
    min_delta=0.00005,
    patience=11,
    verbose=1,
    restore_best_weights=True
)

lr_scheduler = ReduceLROnPlateau(
    monitor='val_accuracy',
    factor=0.5,
    patience=7,
    min_lr=1e-7,
    verbose=1
)

callbacks  = [early_stopping, lr_scheduler]

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


None


In [93]:
# ====================================
#  DATA AUGMENTATION
# ====================================
train_datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.15,
    height_shift_range=0.15,
    shear_range=0.15,
    zoom_range=0.15,
    horizontal_flip=True
)
train_datagen.fit(X_train)

# ====================================
#  TRAINING
# ====================================
batch_size = 32
epochs = 25

optimizer = optimizers.Adam(learning_rate=0.0001)

model.compile(
    loss='categorical_crossentropy',
    optimizer=optimizer,
    metrics=['accuracy']
)

history = model.fit(
    train_datagen.flow(X_train, y_train, batch_size=batch_size),
    validation_data=(X_valid, y_valid),
    steps_per_epoch=int(len(X_train) / batch_size),
    epochs=epochs,
    callbacks=callbacks
)

# ====================================
#  EVALUATION
# ====================================
score = model.evaluate(X_valid, y_valid, verbose=1)
print("Validation Loss:", score[0])
print("Validation Accuracy:", score[1])

y_pred = model.predict(X_valid)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_valid, axis=1)

print(classification_report(y_true, y_pred_classes, target_names=list(emotion_dict.values())))


Epoch 1/25


  self._warn_if_super_not_called()


[1m897/897[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 34ms/step - accuracy: 0.2011 - loss: 1.9691 - val_accuracy: 0.2799 - val_loss: 1.7531 - learning_rate: 1.0000e-04
Epoch 2/25
[1m  1/897[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 12ms/step - accuracy: 0.0938 - loss: 1.9070



[1m897/897[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.0938 - loss: 1.9070 - val_accuracy: 0.2795 - val_loss: 1.7530 - learning_rate: 1.0000e-04
Epoch 3/25
[1m897/897[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 30ms/step - accuracy: 0.2726 - loss: 1.7598 - val_accuracy: 0.3157 - val_loss: 1.7035 - learning_rate: 1.0000e-04
Epoch 4/25
[1m897/897[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.3125 - loss: 1.7271 - val_accuracy: 0.3157 - val_loss: 1.7036 - learning_rate: 1.0000e-04
Epoch 5/25
[1m897/897[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 30ms/step - accuracy: 0.3000 - loss: 1.7235 - val_accuracy: 0.3253 - val_loss: 1.6784 - learning_rate: 1.0000e-04
Epoch 6/25
[1m897/897[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.3438 - loss: 1.7531 - val_accuracy: 0.3256 - val_loss: 1.6784 - learning_rate: 1.0000e-04
Epoch 7/25
[1m897/897[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [100]:
print("✅ Validation Accuracy:", round(score[1]*100,1), "%")

✅ Validation Accuracy: 37.0 %


___ 

## VGG16 Dataset - balanced

New dataset: *merged_disgust.csv*

Here we trained a deep learning model for facial emotion recognition using VGG16 with transfer learning. The model was fine-tuned on a merged dataset of facial expressions, including the disgust class from an extended source. After preprocessing and data augmentation, the network was trained for 25 epochs with early stopping and learning rate reduction strategies. 

✅ This model achieved **66% validation accuracy**.

In [106]:
# ===================================================
# IMPORTS
# ===================================================
import numpy as np
import pandas as pd
import cv2

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

import tensorflow as tf
from tensorflow.keras import optimizers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input

# ===================================================
# LOAD DATASET
# ===================================================
df = pd.read_csv('/kaggle/input/merged-disgust-csv/merged_disgust.csv')
print("Dataset shape:", df.shape)
print(df.head())

emotion_dict = { 0: "Angry", 
                 1: "Disgust",
                 2: "Fear",
                 3: "Happy",
                 4: "Sad",
                 5: "Surprise",
                 6: "Neutral" }


# ===================================================
# IMAGE PROCESSING
# ===================================================
# Convert 'pixels' column into image arrays (48x48 grayscale → RGB)
img_array = df.pixels.apply(lambda x: np.fromstring(x, sep=' ').reshape(48, 48).astype('uint8'))
img_array = np.stack(img_array, axis=0)
print("Grayscale array shape:", img_array.shape)

# Convert to RGB
img_features = np.stack([cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) for img in img_array])
print("RGB image shape:", img_features.shape)

# Encode labels
le = LabelEncoder()
labels = le.fit_transform(df.emotion)
labels = to_categorical(labels)
print("Labels shape:", labels.shape)
print("Label mapping:", dict(zip(le.classes_, le.transform(le.classes_))))

# ===================================================
# TRAIN / VALIDATION SPLIT
# ===================================================
X_train, X_valid, y_train, y_valid = train_test_split(
    img_features,
    labels,
    test_size=0.2,
    stratify=labels,
    random_state=42
)

# ===================================================
# PREPROCESS FOR VGG16
# ===================================================
X_train = preprocess_input(X_train.astype('float32'))
X_valid = preprocess_input(X_valid.astype('float32'))

num_classes = y_train.shape[1]

Dataset shape: (40429, 2)
   emotion                                             pixels
0        0  70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...
1        0  151 150 147 155 148 133 111 140 170 174 182 15...
2        2  231 212 156 164 174 138 161 173 182 200 106 38...
3        4  24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...
4        6  4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...
Grayscale array shape: (40429, 48, 48)
RGB image shape: (40429, 48, 48, 3)
Labels shape: (40429, 7)
Label mapping: {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6}


In [107]:
# ===================================================
# BUILD MODEL WITH VGG16
# ===================================================

base_model = VGG16(weights='imagenet', include_top=False, input_shape=(48, 48, 3))
x = base_model.layers[-2].output
x = GlobalAveragePooling2D()(x)
output = Dense(num_classes, activation='softmax', name='out_layer')(x)
model = Model(inputs=base_model.input, outputs=output)

print(model.summary())

None


In [103]:
# ===================================================
# DATA AUGMENTATION
# ===================================================

early_stopping = EarlyStopping(
    monitor='val_accuracy',
    min_delta=1e-5,
    patience=10,
    verbose=1,
    restore_best_weights=True )

lr_scheduler = ReduceLROnPlateau(
    monitor='val_accuracy',
    factor=0.5,
    patience=5,
    min_lr=1e-7,
    verbose=1 )

callbacks = [early_stopping, lr_scheduler]

train_datagen = ImageDataGenerator(
    preprocessing_function=None,  # already preprocessed
    rotation_range=15,
    width_shift_range=0.15,
    height_shift_range=0.15,
    shear_range=0.15,
    zoom_range=0.15,
    horizontal_flip=True
)

train_datagen.fit(X_train)

# ===================================================
# COMPILE + TRAIN
# ===================================================
model.compile(
    loss='categorical_crossentropy',
    optimizer=optimizers.Adam(learning_rate=1e-4),
    metrics=['accuracy']
)

batch_size = 32
epochs = 25

history = model.fit(
    train_datagen.flow(X_train, y_train, batch_size=batch_size),
    validation_data=(X_valid, y_valid),
    steps_per_epoch=len(X_train) // batch_size,
    epochs=epochs,
    callbacks=callbacks
)

# ===================================================
# EVALUATION
# ===================================================
val_loss, val_acc = model.evaluate(X_valid, y_valid, verbose=0)
print(f"\n\n ✅ Validation Accuracy: {val_acc * 100:.2f}%")

y_pred = model.predict(X_valid)
y_pred_class = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_valid, axis=1)

print("\nClassification Report:")
print(classification_report(y_true, y_pred_class, target_names=[
    emotion_dict[i] for i in range(num_classes)
]))


Epoch 1/25


  self._warn_if_super_not_called()


[1m1010/1010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 70ms/step - accuracy: 0.2854 - loss: 1.9082 - val_accuracy: 0.4891 - val_loss: 1.3340 - learning_rate: 1.0000e-04
Epoch 2/25
[1m   1/1010[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m47s[0m 47ms/step - accuracy: 0.4688 - loss: 1.2751



[1m1010/1010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.4688 - loss: 1.2751 - val_accuracy: 0.4743 - val_loss: 1.3671 - learning_rate: 1.0000e-04
Epoch 3/25
[1m1010/1010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 60ms/step - accuracy: 0.4916 - loss: 1.3563 - val_accuracy: 0.5339 - val_loss: 1.2565 - learning_rate: 1.0000e-04
Epoch 4/25
[1m1010/1010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.5000 - loss: 1.1616 - val_accuracy: 0.5365 - val_loss: 1.2503 - learning_rate: 1.0000e-04
Epoch 5/25
[1m1010/1010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 60ms/step - accuracy: 0.5502 - loss: 1.2143 - val_accuracy: 0.5723 - val_loss: 1.1565 - learning_rate: 1.0000e-04
Epoch 6/25
[1m1010/1010[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.5625 - loss: 1.1103 - val_accuracy: 0.5768 - val_loss: 1.1431 - learning_rate: 1.0000e-04
Epoch 7/25
[1m1010/1010[0m [32m━━━━━━━━━━━━━