In [57]:
# source code: https://www.kaggle.com/code/pierre54/bag-of-words-model-with-sift-descriptors/notebook

import cv2
import numpy as np
import os
import pandas as pd
import csv
import matplotlib.pyplot as plt

from sklearn.cluster import MiniBatchKMeans
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import zero_one_loss as J01

np.random.seed(1234)

**1. Initialize Dataset, Collect Target Emotions**

In [21]:
img_path = 'facial_expressions-master/images/'
train = pd.read_csv('facial_expressions-master/data/legend-fixed.csv').sample(n = 10000)
emotions = train.emotion.sort_values().unique()
#emotions = np.array(['anger', 'contempt', 'disgust', 'fear', 'happiness', 'neutral', 'sadness', 'surprise'])
print(emotions, len(emotions))
dico = []

['anger' 'contempt' 'disgust' 'fear' 'happiness' 'neutral' 'sadness'
 'surprise'] 8


**2. Find Keypoints**

In [22]:
sift = cv2.SIFT_create()

for leaf in train.image:
    img = cv2.imread(img_path + str(leaf))
    kp, des = sift.detectAndCompute(img, None)

    for d in des:
        dico.append(d)

**3. Use K-Means Clustering**

In [23]:
k = np.size(emotions) * 10

batch_size = np.size(os.listdir(img_path)) * 3

k_means = KMeans(init="k-means++", n_clusters=k, n_init=10)
k_means.fit(dico)

**4. Build Histograms**

In [24]:
k_means.verbose = False

histo_list = []

for leaf in train.image:
    img = cv2.imread(img_path + str(leaf))
    kp, des = sift.detectAndCompute(img, None)

    histo = np.zeros(k)
    nkp = np.size(kp)

    for d in des:
        idx = k_means.predict([d])
        histo[idx] += 1/nkp # to normalize histogram, divide by nkp

    histo_list.append(histo)

**5. Train NN**

In [25]:
X = np.array(histo_list)
Y = []

# associate emotion to an integer
for e in train.emotion:
    Y.append(np.min(np.nonzero(emotions == e)))

mlp = MLPClassifier(max_iter=600000)
mlp.fit(X, Y)

**6. Test NN**

In [55]:
test = pd.read_csv('facial_expressions-master/data/legend-fixed.csv').sample(3000)
Y_res = []
Y_va = []

for e in test.emotion:
    Y_va.append(np.min(np.nonzero(emotions == e)))

for leaf in test.image:
    img = cv2.imread(img_path + str(leaf))
    kp, des = sift.detectAndCompute(img, None)

    x = np.zeros(k)
    nkp = np.size(kp)

    for d in des:
        idx = k_means.predict([d])
        x[idx] += 1/nkp
    Y_res.append(mlp.predict([x]))


In [56]:
print(f"Validation Error Rate: {J01(Y_res, Y_va)}")

Validation Error Rate: 0.29700000000000004
