In [20]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LambdaCallback, EarlyStopping, ReduceLROnPlateau
import pickle 

In [26]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')
valid = pd.read_csv('data/val.csv')

emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']


In [27]:
train.head()

Unnamed: 0,emotion,pixels
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...
1,0,151 150 147 155 148 133 111 140 170 174 182 15...
2,2,231 212 156 164 174 138 161 173 182 200 106 38...
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...


In [28]:
train.isnull().sum()

emotion    0
pixels     0
dtype: int64

In [29]:
# Convert the pixels to arrays and normalize
train['pixels'] = train['pixels'].apply(lambda x: np.array(x.split(), dtype='float32'))
test['pixels'] = test['pixels'].apply(lambda x: np.array(x.split(), dtype='float32'))
valid['pixels'] = valid['pixels'].apply(lambda x: np.array(x.split(), dtype='float32'))

In [30]:
# Normalize pixel values to be between 0 and 1
train['pixels'] = train['pixels'] / 255.0
test['pixels'] = test['pixels'] / 255.0
valid['pixels'] = test['pixels'] / 255.0

In [31]:
# Reshape the pixel data to 48x48x1
X_train = np.array(train['pixels'].tolist()).reshape(-1, 48, 48, 1)
X_test = np.array(test['pixels'].tolist()).reshape(-1, 48, 48, 1)
X_valid = np.array(test['pixels'].tolist()).reshape(-1, 48, 48, 1)

In [32]:
# Get the emotion labels and convert to categorical
y_train = to_categorical(train['emotion'], num_classes=7)
y_test = to_categorical(test['emotion'], num_classes=7)
y_valid = to_categorical(valid['emotion'], num_classes=7)

In [33]:
# model = Sequential()

# # Add convolutional layers
# model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(48, 48, 1)))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

# # Flatten the output
# model.add(Flatten())

# # Add dense layers
# model.add(Dense(128, activation='relu'))
# model.add(Dropout(0.5))
# model.add(Dense(7, activation='softmax'))

model = Sequential([
    Input(shape=(48, 48, 1)),
    #Add convolutional layers
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    #Flatten the output
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(7, activation='softmax')
])


In [34]:
model.summary()

In [37]:

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Tạo callback để hiển thị độ chính xác và hàm mất mát sau mỗi 10 epochs
def on_epoch_end(epoch, logs):
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}:")
        print(f" - loss: {logs['loss']:.4f} - accuracy: {logs['accuracy']:.4f}")
        print(f" - val_loss: {logs['val_loss']:.4f} - val_accuracy: {logs['val_accuracy']:.4f}")

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)


In [44]:
# Train the model
#model.fit(X_train, y_train, batch_size=64, epochs=30, validation_split=0.2)

#  
epochs_number = 30

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)


In [45]:

history = model.fit(
    X_train, y_train, batch_size=64,
    epochs=epochs_number, 
    validation_data=(X_valid, y_valid), 
    callbacks=[print_callback]
    )

# history = model.fit(
#     X_train, y_train, batch_size=64,
#     epochs=epochs_number, 
#     validation_data=(X_valid, y_valid), 
#     callbacks=[print_callback, early_stopping,reduce_lr]
#     )


Epoch 1/30
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 51ms/step - accuracy: 0.4045 - loss: 1.5370 - val_accuracy: 0.1797 - val_loss: 2.1126 - learning_rate: 1.0000e-04
Epoch 2/30
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 48ms/step - accuracy: 0.4172 - loss: 1.5160 - val_accuracy: 0.1808 - val_loss: 2.1442 - learning_rate: 1.0000e-04
Epoch 3/30
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 45ms/step - accuracy: 0.4222 - loss: 1.4984 - val_accuracy: 0.1808 - val_loss: 2.2274 - learning_rate: 1.0000e-04
Epoch 4/30
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 45ms/step - accuracy: 0.4370 - loss: 1.4792 - val_accuracy: 0.1789 - val_loss: 2.2029 - learning_rate: 1.0000e-04
Epoch 5/30
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 46ms/step - accuracy: 0.4479 - loss: 1.4552 - val_accuracy: 0.1767 - val_loss: 2.2113 - learning_rate: 1.0000e-04
Epoch 6/30
[1m449/449[0m [32m━━━━━━━━

In [None]:
#  Đánh giá mô hình
loss, accuracy = model.evaluate(X_test, y_test)
#print(f'Test accuracy of the model is -: {accuracy}')
print("Accuracy of the model is - " , accuracy*100 , "%")

In [None]:
epochs = [i for i in range(epochs_number)]
fig , ax = plt.subplots(1,2)
train_acc = history.history['accuracy']
train_loss = history.history['loss']
val_acc = history.history['val_accuracy']
val_loss = history.history['val_loss']
fig.set_size_inches(10,6)

ax[0].plot(epochs , train_acc , 'go-' , label = 'Training Accuracy')
ax[0].plot(epochs , val_acc , 'ro-' , label = 'Testing Accuracy')
ax[0].set_title('Training & Validation Accuracy')
ax[0].legend()
ax[0].set_xlabel("Epochs")
ax[0].set_ylabel("Accuracy")
#ax[0].yticks(np.arange(0, 1.1, 0.1))  # Chia mốc y-axis theo bước 0.1
#ax[0].xticks(np.arange(0, epochs_number, 10))    # Chia mốc x-axis theo bước 10

ax[1].plot(epochs , train_loss , 'g-o' , label = 'Training Loss')
ax[1].plot(epochs , val_loss , 'r-o' , label = 'Testing Loss')
ax[1].set_title('Testing Accuracy & Loss')
ax[1].legend()
ax[1].set_xlabel("Epochs")
ax[1].set_ylabel("Loss")
#ax[1].xticks(np.arange(0, epochs_number, 10))    # Chia mốc x-axis theo bước 10
plt.show()

In [None]:
# Hiển thị confusion matrix
#predictions = model.predict(X_test)
#predicted_classes = np.argmax(predictions, axis=1)


y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
print(y_pred_classes[:10])

In [None]:
conf_matrix = confusion_matrix(y_test, y_pred_classes)
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', 
            xticklabels=emotion_labels, yticklabels=emotion_labels) 
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

In [13]:
# Output the predictions (for demonstration purposes, we'll just print the first 10 predictions)
#print(y_pred_classes[:10])

[2 0 0 3 3 3 4 4 4 2]


In [15]:
submission_df = pd.DataFrame({
    'emotion': y_pred_classes
})

# Save the submission DataFrame to a CSV file
submission_df.to_csv('submission.csv', index=False)

In [16]:
model.save('facial Expression.h5')

