### Anggota Kelompok :


*   2372020 - Denzel Xavier Sugiarta
*   2372067 - Angel Gabriella Yosephine Sibarani
*   2372068 - Indri Mahalani Simamora



In [None]:
import os
import cv2
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
import joblib

# Preprocessing
IMG_SIZE = (64, 64)
expressions = ["neutral", "happy", "sad", "angry"]
dataset_dir = "dataset/faces-png"

def augment_image(img):
    """Augmentasi sederhana: flip dan rotasi"""
    augmented = [img]
    flipped = cv2.flip(img, 1)
    rotated = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
    augmented.extend([flipped, rotated])
    return augmented

X, y = [], []

print("[INFO] Loading dataset with augmentation (pose=straight only)...")

for user_id in os.listdir(dataset_dir):
    user_path = os.path.join(dataset_dir, user_id)
    if not os.path.isdir(user_path):
        continue

    for filename in os.listdir(user_path):
        if not filename.endswith(".png"):
            continue

        parts = filename.split("_")
        if len(parts) < 4:
            continue

        # pose = parts[1].lower()
        expression = parts[2].lower()

        if expression not in expressions:
            continue

        img_path = os.path.join(user_path, filename)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            continue

        img = cv2.resize(img, IMG_SIZE)

        augmented_imgs = augment_image(img)
        for aug_img in augmented_imgs:
            X.append(aug_img.flatten())
            y.append(expression)

print(f"[INFO] Total loaded after augmentation: {len(X)} images")

if len(X) == 0:
    raise ValueError("Tidak ada data pose 'straight' yang ditemukan. Cek kembali struktur filename.")

X = np.array(X)
y = np.array(y)

# Validation Set
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# PCA
print("[INFO] Applying PCA...")
pca = PCA(n_components=100)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

print("[INFO] Hyperparameter tuning...")
param_grid = {
    'n_estimators': [100, 150],
    'max_depth': [10, 20],
    'min_samples_split': [2],
    'min_samples_leaf': [1],
}

# Model
rf = RandomForestClassifier(random_state=42)
grid = GridSearchCV(rf, param_grid, cv=5, n_jobs=-1, verbose=1)
grid.fit(X_train_pca, y_train)

print("\n[INFO] Best Parameters:", grid.best_params_)

# Evaluation
y_pred = grid.predict(X_test_pca)
print("\n[INFO] Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
print("[INFO] Accuracy:", accuracy_score(y_test, y_pred))
# Simpan model terbaik
joblib.dump(grid.best_estimator_, "random_forest_model.pkl")
# Simpan PCA
joblib.dump(pca, "pca_transform.pkl")
# Simpan LabelEncoder
joblib.dump(label_encoder, "label_encoder.pkl")
print("[INFO] Model, PCA, dan LabelEncoder berhasil disimpan.")

[INFO] Loading dataset with augmentation (pose=straight only)...
[INFO] Total loaded after augmentation: 5616 images
[INFO] Applying PCA...
[INFO] Hyperparameter tuning...
Fitting 5 folds for each of 4 candidates, totalling 20 fits

[INFO] Best Parameters: {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 150}

[INFO] Classification Report:
              precision    recall  f1-score   support

       angry       0.78      0.82      0.80       279
       happy       0.77      0.77      0.77       279
     neutral       0.81      0.80      0.81       285
         sad       0.82      0.79      0.80       281

    accuracy                           0.80      1124
   macro avg       0.80      0.80      0.80      1124
weighted avg       0.80      0.80      0.80      1124

[INFO] Accuracy: 0.7953736654804271
[INFO] Model, PCA, dan LabelEncoder berhasil disimpan.


In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder

# Preprocessing
IMG_SIZE = (64, 64)
expressions = ["neutral", "happy", "sad", "angry"]
dataset_dir = "dataset/faces-png"

def augment_image(img):
    augmented = [img]
    flipped = cv2.flip(img, 1)
    rotated = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
    augmented.extend([flipped, rotated])
    return augmented

X, y = [], []

print("[INFO] Loading dataset with augmentation...")

for user_id in os.listdir(dataset_dir):
    user_path = os.path.join(dataset_dir, user_id)
    if not os.path.isdir(user_path):
        continue

    for filename in os.listdir(user_path):
        if not filename.endswith(".png"):
            continue

        parts = filename.split("_")
        if len(parts) < 4:
            continue

        expression = parts[2].lower()
        if expression not in expressions:
            continue

        img_path = os.path.join(user_path, filename)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            continue

        img = cv2.resize(img, IMG_SIZE)
        augmented_imgs = augment_image(img)

        for aug_img in augmented_imgs:
            X.append(aug_img.flatten())
            y.append(expression)

print(f"[INFO] Total loaded images after augmentation: {len(X)}")

X = np.array(X)
y = np.array(y)

# Encoding
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# KNN Training & Eval
print("[INFO] Training KNN...")
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

print("\n[INFO] Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
print("[INFO] Accuracy:", accuracy_score(y_test, y_pred))


[INFO] Loading dataset with augmentation...
[INFO] Total loaded images after augmentation: 5616
[INFO] Training KNN...

[INFO] Classification Report:
              precision    recall  f1-score   support

       angry       0.48      0.66      0.55       279
       happy       0.44      0.50      0.47       279
     neutral       0.62      0.41      0.50       285
         sad       0.54      0.44      0.49       281

    accuracy                           0.50      1124
   macro avg       0.52      0.51      0.50      1124
weighted avg       0.52      0.50      0.50      1124

[INFO] Accuracy: 0.5044483985765125
