In [None]:
import os
import cv2 as cv
import numpy as np
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from sklearn.calibration import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score


In [None]:
# import tensorflow as tf
# from tensorflow.keras import models, layers

#### Loading dataset

In [2]:
DATASET_PATH = "../images/resized"

In [24]:
def load_dataset(dataset_path):
	image_paths = []
	for root, dirs, files in os.walk(dataset_path):
		for name in files:
			image_paths.append(os.path.join(root, name))

	data = []
	labels = []
	for image_path in image_paths:
		image = cv.imread(image_path)
		if image.shape != (60, 60, 3):
			print(image_path)
			continue
		# image = Image.open(image_path)
		label = image_path.split(os.path.sep)[-2]
		data.append(image)
		labels.append(label)

	return np.array(data), np.array(labels)

In [25]:
data, labels = load_dataset(DATASET_PATH)


#### Transforming dataset

In [10]:
data_reshaped = data.reshape(data.shape[0], data.shape[1] * data.shape[2] * data.shape[3])

le = LabelEncoder()
labels = le.fit_transform(labels)

In [33]:
X_train, X_test, y_train, y_test = train_test_split(data_reshaped, labels, test_size=0.25, random_state=42)

#### KNN Training

In [37]:
model_knn = KNeighborsClassifier(n_neighbors=5)
model_knn.fit(X_train, y_train)

In [44]:
print(classification_report(y_test, model_knn.predict(X_test), target_names=le.classes_))

              precision    recall  f1-score   support

       happy       0.73      0.68      0.70       187
         sad       0.53      0.59      0.56       114

    accuracy                           0.64       301
   macro avg       0.63      0.63      0.63       301
weighted avg       0.65      0.64      0.65       301



In [44]:
df_knn_report = pd.DataFrame([{
    "Test Accuracy": accuracy_score(y_test, model_knn.predict(X_test)),
    "Train Accuracy": accuracy_score(y_train, model_knn.predict(X_train))
}], index=["KNN"])

df_knn_report

Unnamed: 0,Test Accuracy,Train Accuracy
KNN,0.647841,0.753607


#### Support Vector Machine

In [45]:
model_svc = SVC()
model_svc.fit(X_train, y_train)

In [46]:
test_pred = model_svc.predict(X_test)
train_pred = model_svc.predict(X_train)

In [12]:
print(classification_report(y_test, test_pred, target_names=le.classes_))

              precision    recall  f1-score   support

       happy       0.66      0.90      0.76       187
         sad       0.59      0.23      0.33       114

    accuracy                           0.65       301
   macro avg       0.62      0.57      0.55       301
weighted avg       0.63      0.65      0.60       301



In [47]:
df_svc_report = pd.DataFrame([{
    "Test Accuracy": accuracy_score(y_test, test_pred),
    "Train Accuracy": accuracy_score(y_train, train_pred)
}], index=["SVC"])
df_svc_report

Unnamed: 0,Test Accuracy,Train Accuracy
SVC,0.651163,0.775805


#### CNN

In [48]:
le = LabelEncoder()
encoded_labels = le.fit_transform(labels)

X_train, X_test, y_train, y_test = train_test_split(data, encoded_labels, test_size=0.25, random_state=42)

In [49]:
model_cnn = models.Sequential([
    layers.Input((60, 60, 3)),
    layers.Conv2D(32, (3, 3), activation="relu"),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation="relu"),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation="relu"),

    layers.Flatten(),
    layers.Dense(64, activation="relu"),
    layers.Dense(1, activation="sigmoid"),
])

# model_cnn.summary()

2024-12-27 15:31:37.982436: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [50]:
model_cnn.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

history = model_cnn.fit(X_train, y_train, epochs=10, 
                    validation_data=(X_test, y_test))


Epoch 1/10


  output, from_logits = _get_logits(


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 117ms/step - accuracy: 0.5774 - loss: 10.2201 - val_accuracy: 0.5980 - val_loss: 0.7525
Epoch 2/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 102ms/step - accuracy: 0.6108 - loss: 0.6909 - val_accuracy: 0.6312 - val_loss: 0.6819
Epoch 3/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 99ms/step - accuracy: 0.6909 - loss: 0.6185 - val_accuracy: 0.6246 - val_loss: 0.6999
Epoch 4/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 99ms/step - accuracy: 0.6709 - loss: 0.6207 - val_accuracy: 0.6179 - val_loss: 0.6983
Epoch 5/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 99ms/step - accuracy: 0.6836 - loss: 0.6143 - val_accuracy: 0.6146 - val_loss: 0.7072
Epoch 6/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 102ms/step - accuracy: 0.7098 - loss: 0.5707 - val_accuracy: 0.6213 - val_loss: 0.8212
Epoch 7/10
[1m29/29[0m [32m━━━━━━━━━━━

In [51]:
test_loss, test_acc = model_cnn.evaluate(X_test,  y_test, verbose=2)
train_loss, train_acc = model_cnn.evaluate(X_train,  y_train, verbose=2)

df_cnn_report = pd.DataFrame([{
    "Test Accuracy": test_acc,
    "Train Accuracy": train_acc
}], index=["CNN"])
df_cnn_report

10/10 - 0s - 25ms/step - accuracy: 0.5980 - loss: 0.8412
29/29 - 1s - 19ms/step - accuracy: 0.7847 - loss: 0.4387


Unnamed: 0,Test Accuracy,Train Accuracy
CNN,0.598007,0.784684


#### Comparison

In [52]:
pd.concat([df_knn_report, df_svc_report, df_cnn_report])

Unnamed: 0,Test Accuracy,Train Accuracy
KNN,0.647841,0.753607
SVC,0.651163,0.775805
CNN,0.598007,0.784684


#### Save models

In [53]:
import joblib

joblib.dump(model_knn, "../models/model_knn.pkl")
joblib.dump(model_svc, "../models/model_svc.pkl")
joblib.dump(model_cnn, "../models/model_cnn.pkl")

['../models/model_cnn.pkl']