In [18]:
import cv2
import glob
import random
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from scipy.stats import randint as sp_randintb
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV

In [19]:
metascore = []

In [27]:
emotions = ["neutral", "anger", "contempt", "disgust", "fear", "happy", "sadness", "surprise"]
#emotions = ["happy", "surprise"]

In [28]:
clf = RandomForestClassifier(n_estimators=600,
                             min_samples_split=12,
                             min_samples_leaf=5,
                             max_features=15,
                             max_depth=7,
                             criterion='gini', 
                             bootstrap=False)

In [29]:
def get_files(emotion):
    files = glob.glob("base1\dataset\\{}\\*".format(emotion))
    random.shuffle(files)
    training = files[:int(len(files) * 0.7)]
    test = files[-int(len(files) * 0.3):]
    return training, test

In [30]:
def make_sets():
    training_X = []
    training_y = []
    test_X = []
    test_y = []

    for emotion in emotions:

        training, test = get_files(emotion)

        for item in training:
            image = cv2.imread(item)
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            training_X.append(gray)
            training_y.append(emotions.index(emotion))

        for item in test:
            image = cv2.imread(item)
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            test_X.append(gray)
            test_y.append(emotions.index(emotion))

    training_X = np.array(training_X)
    training_y = np.array(training_y)
    test_X = np.array(test_X)
    test_y = np.array(test_y)

    return training_X, training_y, test_X, test_y

In [31]:
training_X, training_y, test_X, test_y = make_sets()

training_X = training_X.reshape(training_X.shape[0], 350 * 350)
test_X = test_X.reshape(test_X.shape[0], 350 * 350)

In [32]:
for i in range(0, 10):
    clf = clf.fit(training_X, training_y)
    predict = clf.predict(test_X)
    score = clf.score(test_X, test_y)
    metascore.append(score)

metascore = np.array(metascore)
    
print ('\033[1m Score: ' + '\033[1;34m {} \n'.format(metascore.mean()*100) + '\033[0m')
print ('\033[1m Margin of Erro: ' + '\033[1;34m {} \n'.format(metascore.std()*100) + '\033[0m')

metascore = []

[1m Score: [1;34m 68.34196891191709 
[0m
[1m Margin of Erro: [1;34m 0.43039501880404685 
[0m


In [33]:
cm = confusion_matrix(test_y, predict, labels=[0, 1, 2, 3, 4, 5, 6, 7])
#cm = confusion_matrix(test_y, predict)

print('\033[1m Matriz de confusao:\n\n' + '\033[0m {} \n'.format(cm))

[1m Matriz de confusao:

[0m [[98  0  0  0  0  0  0  0]
 [13  0  0  0  0  0  0  0]
 [ 5  0  0  0  0  0  0  0]
 [16  0  0  1  0  1  0  0]
 [ 7  0  0  0  0  0  0  0]
 [ 7  0  0  0  0 13  0  0]
 [ 8  0  0  0  0  0  0  0]
 [ 3  0  0  0  0  0  0 21]] 



In [84]:
param_dist = {"max_depth" :  [30, 40, 50],
                  "max_features" : [15, 20, 25, 30],
                  "min_samples_split" : [12, 16, 18, 20],
                  "min_samples_leaf" : [5, 8, 12],
                  "n_estimators" : [600, 800, 1200],
                  "bootstrap" : [True, False],
                  "criterion" : ["gini", "entropy"]
                  }

grid = RandomizedSearchCV(estimator=clf, param_distributions = param_dist, n_iter=100, n_jobs=-1)
grid.fit(training_X, training_y)

y_true, y_pred = test_y, grid.predict(test_X)

bs = grid.best_score_
bp = grid.best_params_
cr = classification_report(y_true, y_pred)

print('\033[1m Best Score: ' + '\033[1;34m {}\n'.format(bs) + '\033[0m')
print('\033[1m Best Params:\n\n' + '\033[0m {}\n'.format(bp))
print('\033[1m Classification Report:\n\n' + '\033[0m {}'.format(cr))

[1m Best Score: [1;34m 0.6842105263157895
[0m
[1m Best Params:

[0m {'n_estimators': 1000, 'min_samples_split': 16, 'min_samples_leaf': 5, 'max_features': 30, 'max_depth': 30, 'criterion': 'gini', 'bootstrap': False}

[1m Classification Report:

[0m              precision    recall  f1-score   support

          0       0.64      1.00      0.78        98
          1       0.00      0.00      0.00        13
          2       0.00      0.00      0.00         5
          3       1.00      0.22      0.36        18
          4       0.00      0.00      0.00         7
          5       0.94      0.85      0.89        20
          6       0.00      0.00      0.00         8
          7       1.00      0.71      0.83        24

avg / total       0.64      0.70      0.62       193



  'precision', 'predicted', average, warn_for)


In [None]:
param_grid = {'n_estimators': [200, 500, 700],
              'max_features': ['auto', 'sqrt', 'log2'],
              'max_depth': [4, 5, 6, 7, 8],
              'criterion': ['gini', 'entropy'],
              'min_samples_split': [10, 20, 30, 40, 50]
             }

grid = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5)
grid.fit(training_X, training_y)

y_true, y_pred = test_y, grid.predict(test_X)

bs = grid.best_score_
bp = grid.best_params_
cr = classification_report(y_true, y_pred)

print('\033[1m Best Score: ' + '\033[1;34m {}\n'.format(bs) + '\033[0m')
print('\033[1m Best Params:\n\n' + '\033[0m {}\n'.format(bp))
print('\033[1m Classification Report:\n\n' + '\033[0m {}'.format(cr))

In [83]:
accuracy = accuracy_score(predict, test_y)

print("\n\033[1m Acuracy: \033[1;34m %1.2f%% \033[0m" % accuracy)

Accuracy: 0.6787564766839378
