###  Dataset
---
We used this [face dataset](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data) which consists of 48x48 grayscale images of faces with labels in one of 7 categories: 0=Angry, 1=Disgust, 2=Fear, 3=Happy, 4=Sad, 5=Surprise, 6=Neutral.

An example of a face in this dataset:
<img src = Images/ExampleFace.jpg alt = "Example Face" width =200>


In [82]:
#imports
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV
from PIL import Image
import matplotlib.pyplot as plt

In [83]:
# reading and cleaning data
total = pd.read_csv('icml_face_data.csv')
total = total.drop(columns = [' Usage'])

total_y = total['emotion']
total_x = total[' pixels'].str.split(' ',expand=True).astype(dtype = 'uint8')

x_train, x_test, y_train, y_test  = train_test_split(total_x, total_y, test_size=0.25, random_state=42)

print("Training Components: "+ str(y_train.size))
print("Test Components: "+ str(y_test.size))

Training Components: 26915
Test Components: 8972


In [101]:
# fitting data
sc = MinMaxScaler()

x_train_sc = sc.fit_transform(x_train)
x_test_sc = sc.transform(x_test)


###  PCA

In [103]:
# applying pca

pca = PCA(n_components=255).fit(x_train_sc) #n_components = (255 explains 95%) (881 explains 99%)
x_train_pca = pca.transform(x_train_sc)
x_test_pca = pca.transform(x_test_sc)

explained_variance = pca.explained_variance_ratio_


In [105]:
# logistic regression results

lrc = LogisticRegression(C=0.01, penalty = 'l2', solver = 'saga', tol=0.0001, max_iter=800)

pipe = Pipeline([('pca', pca), ('logistic', lrc)])
pipe.fit(x_train_pca, y_train)
predictions = pipe.predict(x_test_pca)


print(confusion_matrix(y_test, lrc.predict(x_test_pca)))
print(classification_report(y_test, lrc.predict(x_test_pca)))

[[ 173    0   75  449  238  105  195]
 [  14    3   25   37   23    8   20]
 [ 107    0  166  417  232  174  208]
 [  93    0   89 1578  246   85  157]
 [ 124    0   95  465  445   78  292]
 [  32    0   78  221   89  467  103]
 [  75    0   74  519  281   92  525]]
              precision    recall  f1-score   support

           0       0.28      0.14      0.19      1235
           1       1.00      0.02      0.05       130
           2       0.28      0.13      0.17      1304
           3       0.43      0.70      0.53      2248
           4       0.29      0.30      0.29      1499
           5       0.46      0.47      0.47       990
           6       0.35      0.34      0.34      1566

    accuracy                           0.37      8972
   macro avg       0.44      0.30      0.29      8972
weighted avg       0.36      0.37      0.34      8972

