In [13]:
import cv2
import glob
import random
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from scipy.stats import randint as sp_randintb
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import RandomizedSearchCV

In [22]:
metascore = []

In [29]:
emotions = ["neutral", "anger", "contempt", "disgust", "fear", "happy", "sadness", "surprise"]
#emotions = ["happy", "surprise"]

In [30]:
clf = DecisionTreeClassifier(splitter='best',
                             min_samples_split=20,
                             min_samples_leaf=8,
                             max_features=20,
                             max_depth=40,
                             criterion='gini'
                            )

In [31]:
def get_files(emotion):
    files = glob.glob("base1\dataset\\{}\\*".format(emotion))
    random.shuffle(files)
    training = files[:int(len(files) * 0.7)]
    test = files[-int(len(files) * 0.3):]
    return training, test

In [32]:
def make_sets():
    training_X = []
    training_y = []
    test_X = []
    test_y = []

    for emotion in emotions:

        training, test = get_files(emotion)

        for item in training:
            image = cv2.imread(item)
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            training_X.append(gray)
            training_y.append(emotions.index(emotion))

        for item in test:
            image = cv2.imread(item)
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            test_X.append(gray)
            test_y.append(emotions.index(emotion))

    training_X = np.array(training_X)
    training_y = np.array(training_y)
    test_X = np.array(test_X)
    test_y = np.array(test_y)

    return training_X, training_y, test_X, test_y

In [33]:
training_X, training_y, test_X, test_y = make_sets()

training_X = training_X.reshape(training_X.shape[0], 350 * 350)
test_X = test_X.reshape(test_X.shape[0], 350 * 350)

In [34]:
for i in range(0, 10):
    clf = clf.fit(training_X, training_y)
    predict = clf.predict(test_X)
    score = clf.score(test_X, test_y)
    metascore.append(score)

metascore = np.array(metascore)
    
print ('\033[1m Score: ' + '\033[1;34m {} \n'.format(metascore.mean()*100) + '\033[0m')
print ('\033[1m Margin of Erro: ' + '\033[1;34m {} \n'.format(metascore.std()*100) + '\033[0m')

metascore = []

[1m Score: [1;34m 56.683937823834206 
[0m
[1m Margin of Erro: [1;34m 2.2128659589702204 
[0m


In [None]:
cm = confusion_matrix(test_y, predict, labels=[0, 1, 2, 3, 4, 5, 6, 7])
#cm = confusion_matrix(test_y, predict)

print('\033[1m Matriz de confusao:\n\n' + '\033[0m {} \n'.format(cm))

In [12]:
param_dist = {"max_depth" :  [30, 40, 50],
                  "max_features" : [15, 20, 25, 30],
                  "min_samples_split" : [12, 16, 18, 20],
                  "min_samples_leaf" : [5, 8, 12],
                  "splitter" : ['best'],
                  "criterion" : ["gini", "entropy"]
                  }

grid = RandomizedSearchCV(estimator=clf, param_distributions = param_dist, n_iter=100, n_jobs=-1)
grid.fit(training_X, training_y)

y_true, y_pred = test_y, grid.predict(test_X)

bs = grid.best_score_
bp = grid.best_params_
cr = classification_report(y_true, y_pred)

print('\033[1m Best Score: ' + '\033[1;34m {}\n'.format(bs) + '\033[0m')
print('\033[1m Best Params:\n\n' + '\033[0m {}\n'.format(bp))
print('\033[1m Classification Report:\n\n' + '\033[0m {}'.format(cr))

[1m Best Score: [1;34m 0.5128581925055107
[0m
[1m Best Params:

[0m {'splitter': 'best', 'min_samples_split': 20, 'min_samples_leaf': 8, 'max_features': 20, 'max_depth': 40, 'criterion': 'gini'}

[1m Classification Report:

[0m              precision    recall  f1-score   support

          0       0.60      0.81      0.69       289
          1       0.00      0.00      0.00        39
          2       0.00      0.00      0.00        15
          3       0.25      0.23      0.24        53
          4       0.12      0.05      0.07        21
          5       0.50      0.51      0.50        61
          6       0.16      0.09      0.12        32
          7       0.65      0.35      0.46        68

avg / total       0.46      0.53      0.48       578

