In [24]:
import cv2
import glob
import random
import numpy as np
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from scipy.stats import randint as sp_randint
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV

In [25]:
metascore = []

In [32]:
emotions = ["neutral", "anger", "contempt", "disgust", "fear", "happy", "sadness", "surprise"]
#emotions = ["happy", "surprise"]

In [33]:
clf = make_pipeline(PCA(n_components=800,
                        svd_solver='full',
                        ),
                    RandomForestClassifier(n_jobs=-1, 
                                           n_estimators=400,
                                           max_features=0.5, 
                                           max_depth=6,
                                           criterion='entropy', 
                                           bootstrap=False,
                                          )
                    )

In [34]:
def get_files(emotion):
    files = glob.glob("base1\dataset\\{}\\*".format(emotion))
    random.shuffle(files)
    training = files[:int(len(files) * 0.7)]
    test = files[-int(len(files) * 0.3):]
    return training, test

In [35]:
def make_sets():
    training_X = []
    training_y = []
    test_X = []
    test_y = []

    for emotion in emotions:

        training, test = get_files(emotion)

        for item in training:
            image = cv2.imread(item)
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            training_X.append(gray)
            training_y.append(emotions.index(emotion))

        for item in test:
            image = cv2.imread(item)
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            test_X.append(gray)
            test_y.append(emotions.index(emotion))

    training_X = np.array(training_X)
    training_y = np.array(training_y)
    test_X = np.array(test_X)
    test_y = np.array(test_y)

    return training_X, training_y, test_X, test_y

In [36]:
training_X, training_y, test_X, test_y = make_sets()

training_X = training_X.reshape(training_X.shape[0], 350 * 350)
test_X = test_X.reshape(test_X.shape[0], 350 * 350)

In [None]:
for i in range(0, 10):
    
    clf = clf.fit(training_X, training_y)
    predict = clf.predict(test_X)
    score = clf.score(test_X, test_y)
    metascore.append(score)
    
metascore = np.array(metascore)

print ('\033[1m Score: ' + '\033[1;34m {} \n'.format(metascore.mean()*100) + '\033[0m')
print ('\033[1m Margin of Erro: ' + '\033[1;34m {} \n'.format(metascore.std()*100) + '\033[0m')

metascore = []

In [17]:
cm = confusion_matrix(test_y, predict, labels=[0, 1, 2, 3, 4, 5, 6, 7])

print('\033[1m Matriz de confusao:\n\n' + '\033[0m {} \n'.format(cm))

[1m Matriz de confusao:

[0m [[286   0   0   0   0   1   0   2]
 [ 39   0   0   0   0   0   0   0]
 [ 14   0   0   0   0   1   0   0]
 [ 52   0   0   0   0   1   0   0]
 [ 20   0   0   0   0   1   0   0]
 [ 29   0   0   0   0  32   0   0]
 [ 30   0   0   0   0   0   2   0]
 [ 43   0   0   0   0   0   0  25]] 



In [9]:
param_dist = {"randomforestclassifier__max_depth": [5, 6, 7],
                  "randomforestclassifier__max_features": [.5, .75, 1. ],
                  "randomforestclassifier__n_estimators": [400, 500, 600],
                  "randomforestclassifier__bootstrap" :  [True, False],
                  "randomforestclassifier__n_jobs" : [-1],
                  "randomforestclassifier__criterion": ["gini", "entropy"],
                  "pca__svd_solver" : ["full"],
                  "pca__n_components" : [500, 700]
                  }

grid = RandomizedSearchCV(estimator=clf, param_distributions=param_dist, n_iter=100)
grid.fit(training_X, training_y)

y_true, y_pred = test_y, grid.predict(test_X)

bs = grid.best_score_
bp = grid.best_params_
cr = classification_report(y_true, y_pred)

print('\033[1m Best Score: ' + '\033[1;34m {}\n'.format(bs) + '\033[0m')
print('\033[1m Best Params:\n\n' + '\033[0m {}\n'.format(bp))
print('\033[1m Classification Report:\n\n' + '\033[0m {}'.format(cr))

[1m Best Score: [1;34m 0.6557017543859649
[0m
[1m Best Params:

[0m {'randomforestclassifier__n_jobs': -1, 'randomforestclassifier__n_estimators': 400, 'randomforestclassifier__max_features': 0.5, 'randomforestclassifier__max_depth': 6, 'randomforestclassifier__criterion': 'entropy', 'randomforestclassifier__bootstrap': False, 'pca__svd_solver': 'full', 'pca__n_components': 700}

[1m Classification Report:

[0m              precision    recall  f1-score   support

          0       0.59      0.98      0.74        98
          1       0.00      0.00      0.00        13
          2       0.00      0.00      0.00         5
          3       0.00      0.00      0.00        18
          4       0.00      0.00      0.00         7
          5       0.82      0.70      0.76        20
          6       0.00      0.00      0.00         8
          7       1.00      0.54      0.70        24

avg / total       0.51      0.64      0.54       193



  'precision', 'predicted', average, warn_for)


In [20]:
accuracy = accuracy_score(predict, test_y)
    
print("\n\033[1m Acuracy: \033[1;34m %1.2f%% \033[0m" % (accuracy*100))


[1m Acuracy: [1;34m 66.32% [0m
