In [1]:
import pandas as pd


We have created a df, that stores all the image paths and is sorted by label and split.
We first want to load that.

In [3]:
train_df = pd.read_csv('data/train/fer2013_train.csv')
test_df  = pd.read_csv('data/test/fer2013_test.csv')

train_df
test_df

Unnamed: 0,filepath,label,split
0,FER-2013\test\angry\PrivateTest_10131363.jpg,angry,test
1,FER-2013\test\angry\PrivateTest_10304478.jpg,angry,test
2,FER-2013\test\angry\PrivateTest_1054527.jpg,angry,test
3,FER-2013\test\angry\PrivateTest_10590091.jpg,angry,test
4,FER-2013\test\angry\PrivateTest_1109992.jpg,angry,test
...,...,...,...
7173,FER-2013\test\surprise\PublicTest_98089595.jpg,surprise,test
7174,FER-2013\test\surprise\PublicTest_98567249.jpg,surprise,test
7175,FER-2013\test\surprise\PublicTest_98972870.jpg,surprise,test
7176,FER-2013\test\surprise\PublicTest_99242645.jpg,surprise,test


It is good practice to add a third validation split when working with machine learning libraries. So that in the end when testing the model you get a totally unbiased test accuarcy. The validation split is used to finetune hyperparameters. It is ussually computed after each epoch. So first we will split our test data into test and validation set

In [4]:
from sklearn.model_selection import train_test_split

# stratify to keep the same class balance
train_paths, val_paths, train_labels, val_labels = train_test_split(
    train_df['filepath'], train_df['label'],
    test_size=0.1,  # e.g. 10% of training for validation
    stratify=train_df['label'],
    random_state=42
)
print(train_labels)
print(val_labels)

24085      sad
7006      fear
20924      sad
7514      fear
11       angry
         ...  
8174      fear
3952     angry
11548    happy
13290    happy
23832      sad
Name: label, Length: 25838, dtype: object
26775    surprise
3507        angry
26872    surprise
13601       happy
15976     neutral
           ...   
14927       happy
17773     neutral
160         angry
24621         sad
4761         fear
Name: label, Length: 2871, dtype: object


Next we want to encode our labels numerically. We will use the integers 0, ..., 6 and in the end use the softmax activation function to let the modell decide which class is the most likely. COntrary two the two options we learned in the course being one against one, or one against all. 

In [5]:
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf

# labels nummern hinzufügen, also welche ziffer welchem label entspricht
le = LabelEncoder().fit(train_labels)
train_y_int = le.transform(train_labels)           # ints 0…6
val_y_int   = le.transform(val_labels) 


We will define a function that can read the iamges and format them into a 48*48*1 matrix with the last dimension being used for the label and 48*48 representing the picture in grayscale format. If we would have a color picture we could format it into 48*48*3*1 the third dimension being used for encoding blue green and red pixel values. It is also custom and good practice to normalite the pixel values to [0,1]

In [6]:
import cv2
import numpy as np

def load_and_preprocess(path, target_size=(48,48)):
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)      # FER-2013 is grayscale
    img = cv2.resize(img, target_size)
    img = img.astype('float32') / 255.0               # normalize to [0,1]
    return np.expand_dims(img, axis=-1)               # shape (48,48,1)


Next we want to prepare the dataset on which our training happens. tensorflow uses enables you to use so called generator function to allow us to create the nmerical matrices we talked about before

In [7]:
import tensorflow as tf

def gen(paths, labels):
    for p, l in zip(paths, labels):
        img = load_and_preprocess(p)
        yield img, l

train_ds = tf.data.Dataset.from_generator(
    lambda: gen(train_paths, train_y_int),
    output_types=(tf.float32, tf.int32),
    output_shapes=([48,48,1], [])
)
train_ds = (train_ds
            .shuffle(1000)
            .batch(64)
            .prefetch(tf.data.experimental.AUTOTUNE))

val_ds = (tf.data.Dataset.from_generator(
            lambda: gen(val_paths,   val_y_int),
            output_types=(tf.float32, tf.int32),
            output_shapes=([48,48,1], [])
          )
          .batch(64)
          .prefetch(tf.data.experimental.AUTOTUNE))
#generator functions erklären ein bisschen auf tf.dataset eingehen

for images, labels in train_ds.take(1):
    print("Image batch shape:", images.shape)   # e.g. (64,48,48,1)
    print("Label batch shape:", labels.shape)   # e.g. (64,)
    print("Label values:", np.unique(labels.numpy()))

Instructions for updating:
Use output_signature instead
Instructions for updating:
Use output_signature instead
Image batch shape: (64, 48, 48, 1)
Label batch shape: (64,)
Label values: [0 1 2 3 4 5 6]


Lastly we want to build our model. First we will just use a basic CNN model with only one convolutional layer and a pooling layer. The convolutional layer is mostly used to extract features from the pictures. We want to see if these features are already linearly seperable and if the model can achieve good performance without an additional fully connected layer. We will also build a normal cnn model that has an addiotional fully connected layer after the convolutional layer and test the perfomances.

In [8]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Number of emotion categories
num_classes = 7

# Build the model
basic_model = models.Sequential([
    # Input is 48×48 grayscale
    layers.Input(shape=(48, 48, 1)),

    # === The one convolutional layer ===
    layers.Conv2D(
        filters=32,            # number of feature maps
        kernel_size=(3, 3),    # 3×3 receptive field
        activation='relu',
        padding='same'
    ),
    layers.MaxPooling2D(pool_size=(2, 2)),

    # Flatten and feed into a small MLP head
    layers.Flatten(),
    layers.Dense(num_classes, activation='softmax')
])

# Compile with a standard optimizer + loss for multiclass classification
basic_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Inspect the model
basic_model.summary()

# Train it
history_basic = basic_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=100,
    verbose=2
)


Epoch 1/100




404/404 - 459s - 1s/step - accuracy: 0.3517 - loss: 1.6530 - val_accuracy: 0.4194 - val_loss: 1.5546
Epoch 2/100
404/404 - 33s - 82ms/step - accuracy: 0.4354 - loss: 1.4900 - val_accuracy: 0.4155 - val_loss: 1.5121
Epoch 3/100
404/404 - 35s - 88ms/step - accuracy: 0.4701 - loss: 1.4136 - val_accuracy: 0.4521 - val_loss: 1.4669
Epoch 4/100
404/404 - 32s - 78ms/step - accuracy: 0.4913 - loss: 1.3568 - val_accuracy: 0.4455 - val_loss: 1.4760
Epoch 5/100
404/404 - 35s - 88ms/step - accuracy: 0.5130 - loss: 1.3071 - val_accuracy: 0.4573 - val_loss: 1.4615
Epoch 6/100
404/404 - 34s - 85ms/step - accuracy: 0.5309 - loss: 1.2651 - val_accuracy: 0.4615 - val_loss: 1.4468
Epoch 7/100
404/404 - 32s - 79ms/step - accuracy: 0.5475 - loss: 1.2264 - val_accuracy: 0.4653 - val_loss: 1.4447
Epoch 8/100
404/404 - 36s - 89ms/step - accuracy: 0.5626 - loss: 1.1954 - val_accuracy: 0.4525 - val_loss: 1.4937
Epoch 9/100
404/404 - 33s - 81ms/step - accuracy: 0.5740 - loss: 1.1626 - val_accuracy: 0.4657 - val_

In [9]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Number of emotion categories
num_classes = 7

# Build the model
model = models.Sequential([
    # Input is 48×48 grayscale
    layers.Input(shape=(48, 48, 1)),

    # === The one convolutional layer ===
    layers.Conv2D(
        filters=32,            # number of feature maps
        kernel_size=(3, 3),    # 3×3 receptive field
        activation='relu',
        padding='same'
    ),
    layers.MaxPooling2D(pool_size=(2, 2)),

    # Flatten and feed into a small MLP head
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
])

# Compile with a standard optimizer + loss for multiclass classification
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Inspect the model
model.summary()

# Train it
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=100,
    verbose=2
)


Epoch 1/100
404/404 - 49s - 122ms/step - accuracy: 0.3520 - loss: 1.6551 - val_accuracy: 0.4117 - val_loss: 1.5370
Epoch 2/100
404/404 - 48s - 118ms/step - accuracy: 0.4486 - loss: 1.4526 - val_accuracy: 0.4441 - val_loss: 1.4612
Epoch 3/100
404/404 - 45s - 111ms/step - accuracy: 0.4950 - loss: 1.3389 - val_accuracy: 0.4556 - val_loss: 1.4471
Epoch 4/100
404/404 - 44s - 110ms/step - accuracy: 0.5270 - loss: 1.2527 - val_accuracy: 0.4643 - val_loss: 1.4213
Epoch 5/100
404/404 - 46s - 115ms/step - accuracy: 0.5622 - loss: 1.1735 - val_accuracy: 0.4653 - val_loss: 1.4267
Epoch 6/100
404/404 - 47s - 115ms/step - accuracy: 0.5945 - loss: 1.0914 - val_accuracy: 0.4674 - val_loss: 1.4792
Epoch 7/100
404/404 - 47s - 117ms/step - accuracy: 0.6319 - loss: 1.0041 - val_accuracy: 0.4608 - val_loss: 1.4919
Epoch 8/100
404/404 - 46s - 113ms/step - accuracy: 0.6645 - loss: 0.9258 - val_accuracy: 0.4706 - val_loss: 1.5109
Epoch 9/100
404/404 - 45s - 112ms/step - accuracy: 0.7009 - loss: 0.8355 - val_a