In [None]:
import numpy as np  # Linear algebra
import pandas as pd  # Data processing, CSV file I/O (e.g., pd.read_csv)
import matplotlib.pyplot as plt  # Visualization

import seaborn as sns  # Optional for data visualization (not used here)

from sklearn.model_selection import train_test_split  # Data splitting
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder  # Data preprocessing
from sklearn.metrics import classification_report  # Evaluation metrics

import tensorflow as tf
from tensorflow.keras import optimizers, datasets, models, layers  # TensorFlow and Keras imports
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau  # Training callbacks


In [None]:
csvData = pd.read_csv('dataset/icml_face_data.csv/icml_face_data.csv')
emotionsName = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

csvData = csvData.rename(columns={"emotion":"EmotionName"," pixels":"pixels"})
csvData["emotion"] = csvData["EmotionName"].apply(lambda x: emotionsName[int(x)])
csvData.drop(columns=[' Usage'],inplace=True)
csvData.drop(columns=["EmotionName"],inplace=True)
csvData.head()

In [None]:

id = 13
img = np.reshape(np.array(csvData.loc[id,"pixels"].split()).astype(int),(48,48))
plt.imshow(img)
print(csvData.emotion[id])

In [None]:
plt.title('Emotions count', size=16)
sns.countplot(x=csvData["emotion"])
plt.ylabel('count', size=12)
plt.xlabel('emotions', size=12)
sns.despine(top=True, right=True, left=False, bottom=False)
plt.show()

In [None]:
mean = csvData.groupby('emotion').count().mean().values[0]
print("Mean emotion count is: " + str(np.floor(mean)))

In [None]:
correctData = pd.DataFrame()
for emotion in emotionsName:
    #print('\n' + emotion)
    
    if (mean<=len(csvData[csvData.emotion==emotion])):
        dfTmp = csvData[csvData.emotion==emotion].sample(int(mean))
        correctData = pd.concat([correctData, dfTmp])
        #print('Now size is: ' + str(len(dfTmp)))
    else:
        countF = int(mean/len(csvData[csvData.emotion==emotion])) + 1
        dfTmp = pd.DataFrame()
        for i in range(countF):
            dfTmp = pd.concat([dfTmp, csvData[csvData.emotion==emotion]])
        dfTmp = dfTmp[dfTmp.emotion==emotion].sample(int(mean))
        correctData = pd.concat([correctData, dfTmp])
        #print('Now size is: ' + str(len(dfTmp)))
        
correctData = correctData.sample(frac=1).reset_index().drop(columns=['index'])
plt.title('Emotions count', size=16)
sns.countplot(x=correctData["emotion"])
plt.ylabel('count', size=12)
plt.xlabel('emotions', size=12)
sns.despine(top=True, right=True, left=False, bottom=False)
plt.show()

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from tensorflow.keras import models, layers
from tensorflow.keras.utils import to_categorical  # for one-hot encoding

emotionsName = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

X, Y = [], []
for imgData, em in zip(correctData["pixels"], correctData["emotion"]):
  imgs = np.reshape(np.array(imgData.split()).astype(int), (48, 48))
  X.append(imgs)
  Y.append(em)

# One-hot encode labels (using to_categorical instead of OneHotEncoder)
Y = to_categorical(LabelEncoder().fit_transform(Y))

# Train-test split
x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=0, shuffle=True, test_size=0.2)

# Add channel dimension for CNN compatibility
X = np.expand_dims(X, axis=3)
x_train = np.expand_dims(x_train, axis=3)
x_test = np.expand_dims(x_test, axis=3)

# Define the model (addressing input_shape warning)
model = models.Sequential()

# First convolutional layer
model.add(layers.Conv2D(32, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

# Second convolutional layer
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))


# Flatten the data before feeding to fully-connected layers
model.add(layers.Flatten())

# First fully-connected layer
model.add(layers.Dense(128, activation='relu'))

# Dropout layer to prevent overfitting
model.add(layers.Dropout(0.2))

# Output layer with softmax activation for probability distribution
model.add(layers.Dense(len(emotionsName), activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model (replace epochs with a suitable value)
history = model.fit(x_train, y_train, epochs=20, validation_data=(x_test, y_test))


In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()