In [1]:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential, Model
from keras.layers import (Input, Conv2D, BatchNormalization, Activation,
                                     Dense, MaxPooling2D, Dropout, Flatten, 
                                     GlobalAveragePooling2D, Add)
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import keras_tuner as kt
import pandas as pd

In [2]:
CONFIG = {
    'train_dir': 'FER/train',
    'test_dir': 'FER/test',
    'img_size': (48, 48),
    'input_shape': (48, 48, 1),
    'num_classes': 7,
    'batch_size': 32,
    'epochs': 50,
    'validation_split': 0.2,
    'kernel_size': (3, 3),
    'pool_size': (2, 2),
    'use_batch_norm': True,
    'dense_units': 128,
    'random_seed': 42,
    'verbose': 1
}

EMOTIONS = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

hyperparameters = {
    'filters_base': [32, 48, 64],
    'dropout_conv': [0.3, 0.4, 0.5],
    'dropout_dense': [0.3, 0.5],
    'learning_rate': [0.001, 0.0005, 0.0001]
}

In [3]:
train_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,           
    width_shift_range=0.1,       
    height_shift_range=0.1,      
    horizontal_flip=True,        
    zoom_range=0.1,              
    fill_mode='nearest',
    validation_split=CONFIG['validation_split']
)

test_datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    CONFIG['train_dir'], 
    target_size=CONFIG['img_size'], 
    color_mode='grayscale',
    batch_size=CONFIG['batch_size'], 
    class_mode='categorical',
    shuffle=True, 
    subset='training', 
    seed=CONFIG['random_seed']
)

val_generator = train_datagen.flow_from_directory(
    CONFIG['train_dir'], 
    target_size=CONFIG['img_size'], 
    color_mode='grayscale',
    batch_size=CONFIG['batch_size'], 
    class_mode='categorical',
    shuffle=False, 
    subset='validation', 
    seed=CONFIG['random_seed']
)

test_generator = test_datagen.flow_from_directory(
    CONFIG['test_dir'], 
    target_size=CONFIG['img_size'], 
    color_mode='grayscale',
    batch_size=CONFIG['batch_size'], 
    class_mode='categorical', 
    shuffle=False
)

Found 22943 images belonging to 7 classes.
Found 5730 images belonging to 7 classes.
Found 7169 images belonging to 7 classes.


In [4]:
class_weights = compute_class_weight(
    class_weight='balanced', 
    classes=np.unique(train_generator.classes),
    y=train_generator.classes
)
class_weight_dict = dict(enumerate(class_weights))

**2D CNN**


In [5]:
def build_cnn_tuner(hp):
    # Tune hyperparameters
    filters_base = hp.Choice('filters_base', values=hyperparameters['filters_base'])
    dropout_conv = hp.Choice('dropout_conv', values=hyperparameters['dropout_conv'])
    dropout_dense = hp.Choice('dropout_dense', values=hyperparameters['dropout_dense'])
    learning_rate = hp.Choice('learning_rate', values=hyperparameters['learning_rate'])
    
    model = Sequential(name='Baseline_CNN')
    model.add(Input(shape=CONFIG['input_shape'], name='input'))
    
    # Block 1
    model.add(Conv2D(filters_base, CONFIG['kernel_size'], activation='relu', 
                     padding='same', name='conv1_1'))
    if CONFIG['use_batch_norm']:
        model.add(BatchNormalization(name='bn1_1'))
    model.add(Conv2D(filters_base, CONFIG['kernel_size'], activation='relu', 
                     padding='same', name='conv1_2'))
    if CONFIG['use_batch_norm']:
        model.add(BatchNormalization(name='bn1_2'))
    model.add(MaxPooling2D(pool_size=CONFIG['pool_size'], name='pool1'))
    model.add(Dropout(dropout_conv, name='dropout1'))
    
    # Block 2
    model.add(Conv2D(filters_base * 2, CONFIG['kernel_size'], activation='relu', 
                     padding='same', name='conv2_1'))
    if CONFIG['use_batch_norm']:
        model.add(BatchNormalization(name='bn2_1'))
    model.add(Conv2D(filters_base * 2, CONFIG['kernel_size'], activation='relu', 
                     padding='same', name='conv2_2'))
    if CONFIG['use_batch_norm']:
        model.add(BatchNormalization(name='bn2_2'))
    model.add(MaxPooling2D(pool_size=CONFIG['pool_size'], name='pool2'))
    model.add(Dropout(dropout_conv, name='dropout2'))
    
    # Block 3
    model.add(Conv2D(filters_base * 4, CONFIG['kernel_size'], activation='relu', 
                     padding='same', name='conv3'))
    if CONFIG['use_batch_norm']:
        model.add(BatchNormalization(name='bn3'))
    model.add(MaxPooling2D(pool_size=CONFIG['pool_size'], name='pool3'))
    model.add(Dropout(dropout_conv, name='dropout3'))
    
    # Dense layers
    model.add(Flatten(name='flatten'))
    model.add(Dense(CONFIG['dense_units'], activation='relu', name='dense1'))
    if CONFIG['use_batch_norm']:
        model.add(BatchNormalization(name='bn_dense'))
    model.add(Dropout(dropout_dense, name='dropout_dense'))
    
    model.add(Dense(CONFIG['num_classes'], activation='softmax', name='output'))
    
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss='categorical_crossentropy',
        metrics=[
            'accuracy',
            keras.metrics.Precision(name='precision'),
            keras.metrics.Recall(name='recall')
        ]
    )
    
    return model

**ResNet18**


In [6]:
def build_resnet18_tuner(hp):
    filters_base = hp.Choice('filters_base', values=hyperparameters['filters_base'])
    dropout_conv = hp.Choice('dropout_conv', values=hyperparameters['dropout_conv'])
    dropout_dense = hp.Choice('dropout_dense', values=hyperparameters['dropout_dense'])
    learning_rate = hp.Choice('learning_rate', values=hyperparameters['learning_rate'])
    
    inputs = Input(shape=CONFIG['input_shape'], name='input')
    
    # Stem
    x = Conv2D(filters_base, CONFIG['kernel_size'], strides=1, 
               padding='same', name='conv1')(inputs)
    if CONFIG['use_batch_norm']:
        x = BatchNormalization(name='bn1')(x)
    x = Activation('relu', name='relu1')(x)
    
    # Helper function for residual blocks
    def residual_block(x, filters, stride=1, name=''):
        shortcut = x
        
        x = Conv2D(filters, CONFIG['kernel_size'], strides=stride, padding='same', 
                   name=f'{name}_conv1')(x)
        if CONFIG['use_batch_norm']:
            x = BatchNormalization(name=f'{name}_bn1')(x)
        x = Activation('relu', name=f'{name}_relu1')(x)
        
        x = Conv2D(filters, CONFIG['kernel_size'], strides=1, padding='same', 
                   name=f'{name}_conv2')(x)
        if CONFIG['use_batch_norm']:
            x = BatchNormalization(name=f'{name}_bn2')(x)
        
        if stride != 1 or shortcut.shape[-1] != filters:
            shortcut = Conv2D(filters, (1, 1), strides=stride, padding='same',
                             name=f'{name}_shortcut_conv')(shortcut)
            if CONFIG['use_batch_norm']:
                shortcut = BatchNormalization(name=f'{name}_shortcut_bn')(shortcut)
        
        x = Add(name=f'{name}_add')([x, shortcut])
        x = Activation('relu', name=f'{name}_relu2')(x)
        
        return x
    
    # Stage 1
    x = residual_block(x, filters_base, stride=1, name='stage1_block1')
    x = residual_block(x, filters_base, stride=1, name='stage1_block2')
    if dropout_conv > 0:
        x = Dropout(dropout_conv, name='dropout_stage1')(x)
    
    # Stage 2
    x = residual_block(x, filters_base * 2, stride=2, name='stage2_block1')
    x = residual_block(x, filters_base * 2, stride=1, name='stage2_block2')
    if dropout_conv > 0:
        x = Dropout(dropout_conv, name='dropout_stage2')(x)
    
    # Stage 3
    x = residual_block(x, filters_base * 4, stride=2, name='stage3_block1')
    x = residual_block(x, filters_base * 4, stride=1, name='stage3_block2')
    if dropout_conv > 0:
        x = Dropout(dropout_conv, name='dropout_stage3')(x)
    
    # Stage 4
    x = residual_block(x, filters_base * 8, stride=2, name='stage4_block1')
    x = residual_block(x, filters_base * 8, stride=1, name='stage4_block2')
    if dropout_conv > 0:
        x = Dropout(dropout_conv, name='dropout_stage4')(x)
    
    x = GlobalAveragePooling2D(name='global_avg_pool')(x)
    outputs = Dense(CONFIG['num_classes'], activation='softmax', name='output')(x)
    
    model = Model(inputs=inputs, outputs=outputs, name='ResNet18')
    
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss='categorical_crossentropy',
        metrics=[
            'accuracy',
            keras.metrics.Precision(name='precision'),
            keras.metrics.Recall(name='recall')
        ]
    )
    
    return model

**Tuner**


In [10]:
def tune(model_builder, model_name, search_strategy='random', max_trials=10):
    # Create tuner based on strategy
    if search_strategy == 'random':
        tuner = kt.RandomSearch(
            model_builder,
            objective=kt.Objective('val_accuracy', direction='max'),
            max_trials=max_trials,
            executions_per_trial=1,
            directory=f'tuning_{model_name}',
            project_name=f'{search_strategy}_search',
            overwrite=True,
            seed=CONFIG['random_seed']
        )
    
    elif search_strategy == 'bayesian':
        tuner = kt.BayesianOptimization(
            model_builder,
            objective=kt.Objective('val_accuracy', direction='max'),
            max_trials=max_trials,
            executions_per_trial=1,
            directory=f'tuning_{model_name}',
            project_name=f'{search_strategy}_search',
            overwrite=True,
            seed=CONFIG['random_seed']
        )
    
    elif search_strategy == 'hyperband':
        tuner = kt.Hyperband(
            model_builder,
            objective=kt.Objective('val_accuracy', direction='max'),
            max_epochs=CONFIG['epochs'],
            factor=3,
            directory=f'tuning_{model_name}',
            project_name=f'{search_strategy}_search',
            overwrite=True,
            seed=CONFIG['random_seed']
        )
    
    else:
        raise ValueError("search_strategy must be 'random', 'bayesian', or 'hyperband'")
    
    # Callbacks
    callbacks = [
        EarlyStopping(
            monitor='val_accuracy',
            patience=10,
            restore_best_weights=True,
            verbose=0
        ),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-7,
            verbose=0
        )
    ]
    
    # Start search
    print("\nStarting hyperparameter search...")
    tuner.search(
        train_generator,
        validation_data=val_generator,
        epochs=CONFIG['epochs'],
        callbacks=callbacks,
        class_weight=class_weight_dict,
        verbose=1
    )
    
    # Get best hyperparameters
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    
    print(f"\n{'='*70}")
    print(f"BEST HYPERPARAMETERS FOR {model_name.upper()}")
    print(f"{'='*70}")
    print(f"  filters_base: {best_hps.get('filters_base')}")
    print(f"  dropout_conv: {best_hps.get('dropout_conv')}")
    print(f"  dropout_dense: {best_hps.get('dropout_dense')}")
    print(f"  learning_rate: {best_hps.get('learning_rate')}")
    
    # Get best model
    best_model = tuner.get_best_models(num_models=1)[0]
    
    # Evaluate best model
    print(f"\nEvaluating best model on validation set...")
    val_loss, val_acc, val_precision, val_recall = best_model.evaluate(
        val_generator, verbose=0
    )
    
    print(f"\nBest Model Performance:")
    print(f"  Val Accuracy: {val_acc:.4f}")
    print(f"  Val Loss: {val_loss:.4f}")
    print(f"  Val Precision: {val_precision:.4f}")
    print(f"  Val Recall: {val_recall:.4f}")
    
    best_model.save(f'best_{model_name}_{search_strategy}.keras')
    print(f"\nBest model saved as 'best_{model_name}_{search_strategy}.keras'")
    
    tuner.results_summary()
    
    return tuner, best_hps, best_model

**Tuned CNN**


In [None]:
cnn_tuner, cnn_best_hps, cnn_best_model = tune(
    build_cnn_tuner, 
    model_name='cnn', 
    search_strategy='random',
    max_trials=10
)

**Tuned ResNet18**


In [None]:
resnet_tuner, resnet_best_hps, resnet_best_model = tune(
    build_resnet18_tuner, 
    model_name='resnet18', 
    search_strategy='random',
    max_trials=10
)