In [1]:
# Mount "My Drive" into /content/drive
from google.colab import drive

google_drive_dir = "Final_project"  # @param
#bml-notebooks/
drive.mount('/content/drive')

mount_point = "/content/drive/My Drive/{}".format(google_drive_dir)

# Change the root directory to your mount_point
% cd '$mount_point'

Mounted at /content/drive
/content/drive/My Drive/Final_project


# <span style="color: royalblue;">Load Data</span>
Load the image data with labels.

In [2]:
import numpy as np
from src.loadData import loadFer2013
from keras.utils import to_categorical


In [3]:
X_train,X_val,X_test,y_train,y_val,y_test = loadFer2013('fer2013/icml_face_data.csv')

y_train_onehot = to_categorical(y_train)
y_val_onehot = to_categorical(y_val)
y_test_onehot = to_categorical(y_test)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['emotion'][data['emotion']>1]-=1


In [4]:
y_train_onehot.shape, y_val_onehot.shape, y_test_onehot.shape

((28709, 6), (3589, 6), (3589, 6))

In [5]:
X_train.shape, y_train.shape

((28709, 48, 48), (28709,))

In [6]:
emotions = {0: 'Angry', 1:'Fear', 2: 'Happy', 3: 'Sad', 4: 'Surprise', 5: 'Neutral'}

In [7]:
class_names = list(emotions.values())
class_names

['Angry', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

# Extract features

In [8]:
X_val.shape

(3589, 48, 48)

In [9]:
X_data = np.vstack((X_train,X_val))
X_data.shape

(32298, 48, 48)

In [10]:
X_data = X_data.reshape(32298,-1)
X_data.shape

(32298, 2304)

In [11]:
X_test.shape

(3589, 48, 48)

In [12]:
h,w = X_train[0].shape
h,w

(48, 48)

In [13]:
import time
from sklearn.decomposition import PCA

n_components = 150

print("Extracting the top %d eigenfaces from %d faces"
      % (n_components, X_data.shape[0]))
t0 = time.time()
pca = PCA(n_components=n_components, svd_solver='randomized',
          whiten=True).fit(X_data)
print("done in %0.3fs" % (time.time() - t0))

eigenfaces = pca.components_.reshape((n_components, h, w))

print("Projecting the input data on the eigenfaces orthonormal basis")
t0 = time.time()
X_train_pca = pca.transform(X_data)
X_test_pca = pca.transform(X_test.reshape(3589,-1))
print("done in %0.3fs" % (time.time() - t0))

Extracting the top 150 eigenfaces from 32298 faces
done in 16.890s
Projecting the input data on the eigenfaces orthonormal basis
done in 1.092s


# Random Forest Classification

In [30]:
y_train_onehot.shape

(28709, 6)

In [31]:
y_label = np.vstack((y_train_onehot,y_val_onehot))
y_label.shape

(32298, 6)

In [33]:
from sklearn.ensemble import RandomForestClassifier
import time
RF = RandomForestClassifier(max_depth=25, random_state=42)
st = time.time()
RF.fit(X_train_pca, y_label)
print(time.time()-st)
RF.score(X_train_pca, y_label)

70.56053924560547


0.9972753730881169

In [18]:
from sklearn.ensemble import RandomForestClassifier
import time
from sklearn.model_selection import GridSearchCV
parameters = {'max_depth':list(range(2,20,5))}
RF = RandomForestClassifier()

clf = GridSearchCV(RF, parameters)
clf.fit(X_train_pca, y_label)

GridSearchCV(cv=None, error_score=nan,
             estimator=RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                              class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              max_samples=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators=100, n_jobs=None,
                                              oob_score=False,
                                              ra

In [19]:
clf.best_params_

{'max_depth': 17}

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
# Train a SVM classification model

print("Fitting the classifier to the training set")
t0 = time.time()
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
clf = GridSearchCV(
    SVC(kernel='rbf', class_weight='balanced'), param_grid
)
clf = clf.fit(X_train_pca, y_train)
print("done in %0.3fs" % (time() - t0))
print("Best estimator found by grid search:")
print(clf.best_estimator_)

Fitting the classifier to the training set


#Evaluation

In [40]:
# Quantitative evaluation of the model quality on the test set
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

print("Predicting people's names on the test set")
t0 = time.time()
y_pred = RF.predict(X_test_pca)
print("done in %0.3fs" % (time.time() - t0))

print(classification_report(y_test_onehot, y_pred, target_names=class_names))
# print(confusion_matrix(y_test_onehot, y_pred, labels=range(len(class_names))))

Predicting people's names on the test set
done in 0.248s
              precision    recall  f1-score   support

       Angry       0.90      0.07      0.14       467
        Fear       0.96      0.12      0.22       552
       Happy       0.93      0.08      0.15       895
         Sad       0.94      0.04      0.08       653
    Surprise       0.98      0.31      0.47       415
     Neutral       0.89      0.05      0.10       607

   micro avg       0.95      0.10      0.18      3589
   macro avg       0.93      0.11      0.19      3589
weighted avg       0.93      0.10      0.17      3589
 samples avg       0.10      0.10      0.10      3589



  _warn_prf(average, modifier, msg_start, len(result))


In [41]:
y_pred.shape

(3589, 6)

In [42]:
y_test_onehot.shape

(3589, 6)

In [22]:
from sklearn import metrics
print(metrics.accuracy_score(y_test, y_pred))
print(metrics.f1_score(y_test, y_pred,average='micro'))


0.4329896907216495
0.43298969072164945


In [None]:
import pickle
pickle.dump(svm, open('saved_model/eignface_svmC1.pkl', 'wb'))