# Análise dos Classificadores

Esse notebook tem como objetivo realizar uma análise da classificação das características obtidas com os descritores de cor utilizando a biblioteca LazyPredict. 

A biblioteca citada faz uma predição das caracteristicas utilizando vários classificadores e identifica os com maior acurácia.

##### Carregar bibliotecas a serem utilizadas

In [64]:
# open and manipulate imgs
import glob
import cv2

import numpy as np

# dominant color
from sklearn.cluster import KMeans

# color moments
from scipy.stats import skew, kurtosis

# Lazy Prediction conversion
import pandas as pd

# ignore warnings
import warnings

warnings.filterwarnings('ignore')

##### Lazy Predictions

In [65]:
from lazypredict.Supervised import LazyClassifier
from sklearn.model_selection import train_test_split

##### Carregar Imagens

In [66]:
DIR_IMGS_NORMAIS  = "./data/olhos/normal"
DIR_IMGS_PROBLEMA = "./data/olhos/problema"

DIR_MARCACOES_NORMAIS  = "./data/marcacoes/normal"
DIR_MARCACOES_PROBLEMA = "./data/marcacoes/normal"

In [67]:
def load_images(path, label):
    imgs, labels = [], []
    
    # select every .jpg file from directory
    for file in glob.iglob(path + '/*.jpg'):
    
        img  = cv2.imread(file)
        img  = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                   
        imgs.append(img)
        labels.append(label)
    
    return imgs, labels

In [68]:
n_imgs, n_labels = load_images(DIR_IMGS_NORMAIS, 0)
p_imgs, p_labels = load_images(DIR_IMGS_PROBLEMA, 1)

imgs   = n_imgs + p_imgs
labels = n_labels + p_labels

##### Criação do Lazy Predict

In [69]:
def iniciate_lazy(features, labels, test_size):
    features = pd.DataFrame(features)
    
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size = test_size, random_state = 42)
    
    clf = LazyClassifier(predictions = True)
    models, predictions = clf.fit(X_train, X_test, y_train, y_test)
    
    return models, predictions

## Melhores Classificadores para Histograma de Cor

In [70]:
def color_histogram(img):
    hist = []
    
    for channel in cv2.split(img):
        hist.extend(cv2.calcHist([channel], [0], None, [256], [0, 256]))
    
    hist = np.concatenate(np.array(hist), axis = 0)
    
    return hist

In [80]:
features_hist = []

# extract features
for img in imgs:
    feats = color_histogram(img)
    features_hist.append(feats)

### 90 / 10

In [81]:
model, predictions = iniciate_lazy(features_hist, labels, 0.1)
model[:7]

 90%|█████████████████████████████████████████████████████████████████████████▊        | 27/30 [00:03<00:00,  9.13it/s]



100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:04<00:00,  6.71it/s]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AdaBoostClassifier,0.83,0.88,0.88,0.84,0.66
BernoulliNB,0.83,0.88,0.88,0.84,0.06
ExtraTreesClassifier,0.92,0.88,0.88,0.91,0.28
RidgeClassifierCV,0.83,0.81,0.81,0.83,0.06
RidgeClassifier,0.83,0.81,0.81,0.83,0.07
RandomForestClassifier,0.83,0.81,0.81,0.83,0.42
GaussianNB,0.83,0.81,0.81,0.83,0.05


### 80 / 20

In [82]:
model, predictions = iniciate_lazy(features_hist, labels, 0.2)
model[:7]

 90%|█████████████████████████████████████████████████████████████████████████▊        | 27/30 [00:03<00:00,  9.36it/s]



100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:04<00:00,  7.26it/s]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BernoulliNB,0.79,0.81,0.81,0.79,0.06
LinearDiscriminantAnalysis,0.83,0.8,0.8,0.83,0.06
Perceptron,0.79,0.79,0.79,0.79,0.06
XGBClassifier,0.83,0.78,0.78,0.82,0.44
SGDClassifier,0.79,0.77,0.77,0.79,0.06
GaussianNB,0.79,0.77,0.77,0.79,0.04
PassiveAggressiveClassifier,0.75,0.76,0.76,0.75,0.09


### 70 / 30

In [83]:
model, predictions = iniciate_lazy(features_hist, labels, 0.3)
model[:7]

 87%|███████████████████████████████████████████████████████████████████████           | 26/30 [00:03<00:00,  8.99it/s]



100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:03<00:00,  7.58it/s]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Perceptron,0.86,0.87,0.87,0.87,0.06
LinearDiscriminantAnalysis,0.89,0.8,0.8,0.88,0.08
XGBClassifier,0.89,0.8,0.8,0.88,0.47
RidgeClassifierCV,0.83,0.79,0.79,0.83,0.07
RidgeClassifier,0.83,0.79,0.79,0.83,0.05
LogisticRegression,0.83,0.79,0.79,0.83,0.12
BernoulliNB,0.78,0.78,0.78,0.79,0.06


## Melhores Classificadores para Cores Dominantes

In [76]:
def dominant_color(img):
    
    img = img.reshape((img.shape[0] * img.shape[1],3))
    clt = KMeans(n_clusters=3)
    clt.fit(img)

    dominant = np.concatenate(clt.cluster_centers_, axis = 0)
    
    return dominant

In [75]:
features_dom = []

# extract features
for img in imgs:
    feats = dominant_color(img)
    features_dom.append(feats)

### 90 / 10

In [77]:
model, predictions = iniciate_lazy(features_dom, labels, 0.1)
model[:7]

 90%|█████████████████████████████████████████████████████████████████████████▊        | 27/30 [00:01<00:00, 15.08it/s]



100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:01<00:00, 19.22it/s]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LogisticRegression,0.83,0.75,0.75,0.81,0.04
KNeighborsClassifier,0.83,0.75,0.75,0.81,0.02
LinearDiscriminantAnalysis,0.75,0.69,0.69,0.74,0.03
Perceptron,0.75,0.69,0.69,0.74,0.03
LinearSVC,0.75,0.69,0.69,0.74,0.03
RidgeClassifier,0.75,0.69,0.69,0.74,0.03
RidgeClassifierCV,0.75,0.69,0.69,0.74,0.03


### 80 / 20

In [78]:
model, predictions = iniciate_lazy(features_dom, labels, 0.2)
model[:7]

 90%|█████████████████████████████████████████████████████████████████████████▊        | 27/30 [00:01<00:00, 14.57it/s]



100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:01<00:00, 18.52it/s]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AdaBoostClassifier,0.83,0.78,0.78,0.82,0.16
SGDClassifier,0.83,0.78,0.78,0.82,0.03
LinearSVC,0.79,0.74,0.74,0.78,0.03
PassiveAggressiveClassifier,0.79,0.74,0.74,0.78,0.02
RidgeClassifier,0.75,0.69,0.69,0.73,0.02
Perceptron,0.67,0.64,0.64,0.67,0.05
DecisionTreeClassifier,0.71,0.63,0.63,0.67,0.03


### 70 / 30

In [79]:
model, predictions = iniciate_lazy(features_dom, labels, 0.3)
model[:7]

100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:01<00:00, 21.05it/s]






Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
SGDClassifier,0.83,0.79,0.79,0.83,0.03
LinearSVC,0.86,0.78,0.78,0.85,0.03
AdaBoostClassifier,0.83,0.7,0.7,0.81,0.17
DecisionTreeClassifier,0.69,0.63,0.63,0.7,0.02
RidgeClassifierCV,0.78,0.63,0.63,0.74,0.02
RidgeClassifier,0.78,0.63,0.63,0.74,0.02
Perceptron,0.78,0.63,0.63,0.74,0.02


## Melhores Classificadores para Momentos de Cor

In [84]:
def color_moments(img):
    
    moments = []
    
    for ch in cv2.split(img):
        
        # first color moment
        mean      = np.mean(ch)
        moments.append(mean)
        
        # second color moment
        variance  = np.var(ch)
        moments.append(variance)

        # third color moment
        skewness  = skew(ch.reshape(-1))
        moments.append(skewness)
        
        # forth color moment
        kurt      = kurtosis(ch.reshape(-1))
        moments.append(kurt)
    
    moments = np.array(moments)
    
    return moments

In [85]:
features_mom = []

# extract features
for img in imgs:
    feats = color_moments(img)
    features_mom.append(feats)

### 90 / 10

In [87]:
model, predictions = iniciate_lazy(features_mom, labels, 0.1)
model[:7]

100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:01<00:00, 18.72it/s]






Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LinearDiscriminantAnalysis,0.92,0.94,0.94,0.92,0.03
RidgeClassifierCV,0.92,0.94,0.94,0.92,0.02
DecisionTreeClassifier,0.92,0.94,0.94,0.92,0.03
RidgeClassifier,0.92,0.94,0.94,0.92,0.02
LabelPropagation,0.92,0.94,0.94,0.92,0.02
LabelSpreading,0.92,0.94,0.94,0.92,0.04
XGBClassifier,0.92,0.88,0.88,0.91,0.12


### 80 / 20

In [88]:
model, predictions = iniciate_lazy(features_mom, labels, 0.2)
model[:7]

 97%|███████████████████████████████████████████████████████████████████████████████▎  | 29/30 [00:01<00:00, 14.93it/s]



100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:01<00:00, 18.76it/s]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CalibratedClassifierCV,0.83,0.82,0.82,0.83,0.06
RidgeClassifier,0.83,0.82,0.82,0.83,0.02
Perceptron,0.83,0.82,0.82,0.83,0.03
PassiveAggressiveClassifier,0.83,0.82,0.82,0.83,0.02
LinearSVC,0.83,0.82,0.82,0.83,0.03
LabelPropagation,0.83,0.8,0.8,0.83,0.02
ExtraTreesClassifier,0.83,0.8,0.8,0.83,0.21


### 70 / 30

In [89]:
model, predictions = iniciate_lazy(features_mom, labels, 0.3)
model[:7]

 97%|███████████████████████████████████████████████████████████████████████████████▎  | 29/30 [00:01<00:00, 15.58it/s]



100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:01<00:00, 17.37it/s]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LinearSVC,0.89,0.86,0.86,0.89,0.03
SGDClassifier,0.89,0.86,0.86,0.89,0.05
RidgeClassifier,0.89,0.86,0.86,0.89,0.05
LinearDiscriminantAnalysis,0.86,0.84,0.84,0.86,0.03
RidgeClassifierCV,0.86,0.84,0.84,0.86,0.05
AdaBoostClassifier,0.89,0.83,0.83,0.88,0.16
CalibratedClassifierCV,0.86,0.81,0.81,0.86,0.06


## Todos os descritores concatenados

In [90]:
features_all = []

# extract features
for img in imgs:
    feats = np.concatenate((color_histogram(img), dominant_color(img), color_moments(img)), axis=None)
    features_all.append(feats)

### 90 / 10

In [91]:
model, predictions = iniciate_lazy(features_all, labels, 0.1)
model[:7]

 87%|███████████████████████████████████████████████████████████████████████           | 26/30 [00:03<00:00,  8.11it/s]



100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:04<00:00,  6.56it/s]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ExtraTreeClassifier,1.0,1.0,1.0,1.0,0.05
ExtraTreesClassifier,0.92,0.88,0.88,0.91,0.29
BernoulliNB,0.83,0.88,0.88,0.84,0.07
BaggingClassifier,0.92,0.88,0.88,0.91,0.28
LinearDiscriminantAnalysis,0.83,0.81,0.81,0.83,0.12
RidgeClassifierCV,0.83,0.81,0.81,0.83,0.08
RidgeClassifier,0.83,0.81,0.81,0.83,0.07


### 80 / 20

In [92]:
model, predictions = iniciate_lazy(features_all, labels, 0.2)
model[:7]

 90%|█████████████████████████████████████████████████████████████████████████▊        | 27/30 [00:03<00:00,  9.02it/s]



100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:04<00:00,  6.87it/s]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BernoulliNB,0.79,0.81,0.81,0.79,0.06
XGBClassifier,0.83,0.78,0.78,0.82,0.56
RidgeClassifierCV,0.79,0.77,0.77,0.79,0.09
GaussianNB,0.79,0.77,0.77,0.79,0.05
LGBMClassifier,0.79,0.74,0.74,0.78,0.35
ExtraTreesClassifier,0.79,0.74,0.74,0.78,0.29
LogisticRegression,0.75,0.73,0.73,0.75,0.13


### 70 / 30

In [93]:
model, predictions = iniciate_lazy(features_all, labels, 0.3)
model[:7]

 87%|███████████████████████████████████████████████████████████████████████           | 26/30 [00:03<00:00,  8.28it/s]



100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:04<00:00,  7.31it/s]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
XGBClassifier,0.89,0.8,0.8,0.88,0.53
RidgeClassifierCV,0.83,0.79,0.79,0.83,0.08
RidgeClassifier,0.83,0.79,0.79,0.83,0.06
LogisticRegression,0.83,0.79,0.79,0.83,0.13
BernoulliNB,0.78,0.78,0.78,0.79,0.06
LinearDiscriminantAnalysis,0.86,0.78,0.78,0.85,0.11
ExtraTreesClassifier,0.86,0.78,0.78,0.85,0.29
