In [None]:
import glob
import numpy as np
import pandas as pd
from skimage.io import imread, imread_collection
from skimage.color import rgb2gray
import matplotlib.pyplot as plt
import time
from IPython.display import clear_output
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

In [None]:
# Carregando as imagens da base
negativo = glob.glob('../input/leishmaniose-lamina/Negativo/*.jpg')
positivo = glob.glob('../input/leishmaniose-lamina/Positivo/*.jpg')

imagens = imread_collection(negativo+positivo)

In [None]:
# 4 imagens aleatórias
plot = imagens[1], imagens[9], imagens[100], imagens[115]

In [None]:
def exibe_imagens(imgs):
    fig, ax = plt.subplots(2, 2, figsize=(20, 25))
    ax = ax.ravel()

    for i in range(len(imgs)):
        ax[i].imshow(imgs[i], cmap="gray")
        
exibe_imagens(plot)

In [None]:
labels = np.concatenate((np.zeros(len(negativo)), np.ones(len(positivo))))
labels

# Descritores
- GLCM
- HOG

# 1. Extraindo características com o descritor GLCM

* Contraste: Mede as variações locais na matriz de coocorrência de níveis de cinza.
* Correlação: Mede a ocorrência de probabilidade conjunta dos pares de pixels especificados.
* Energia: Fornece a soma dos elementos quadrados no GLCM. Também conhecido como uniformidade ou segundo momento angular.
* Homogeneidade: Mede a proximidade da distribuição dos elementos no GLCM à diagonal do GLCM.

In [None]:
d = 1

In [None]:
from skimage.feature import greycomatrix,greycoprops

features = np.zeros((len(labels), 18)) # 6 features x 3 canais de cor
start = time.time()

for id_im, imagem in enumerate(imagens):
    clear_output(wait=True)
    print('Extraindo atributos: {} imagens processadas de {}.'.format(id_im+1, len(imagens)))
    for id_ch in range(3):
        
        # Extrai características em todos os ângulos
        matrix0 = greycomatrix(imagem[:,:,id_ch], [d], [0],normed=True)
        matrix1 = greycomatrix(imagem[:,:,id_ch], [d], [np.pi/4],normed=True)
        matrix2 = greycomatrix(imagem[:,:,id_ch], [d], [np.pi/2],normed=True)
        matrix3 = greycomatrix(imagem[:,:,id_ch], [d], [3*np.pi/4],normed=True)
        matrix = (matrix0+matrix1+matrix2+matrix3)/4 
        
        
        # Insere as características na matriz features
        props = np.zeros((6))
        props[0] = greycoprops(matrix,'contrast')
        props[1] = greycoprops(matrix,'dissimilarity')
        props[2] = greycoprops(matrix,'homogeneity')
        props[3] = greycoprops(matrix,'energy')
        props[4] = greycoprops(matrix,'correlation')
        props[5] = greycoprops(matrix,'ASM')
        features[id_im,id_ch*6:(id_ch+1)*6] = props

end = time.time()
print('time = ', end - start)

## 1.1 Visualização dos atributos

In [None]:
fig = plt.figure(figsize=(10,6))
plt.scatter(features[:,1],features[:,2], c=labels) # Plot utilizando duas características

In [None]:
from sklearn.decomposition import PCA

pca = PCA()
pca.fit(features)
transform = pca.transform(features)
print(np.sum(pca.explained_variance_ratio_[0:3]))

In [None]:
fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(transform[:,0],transform[:,1], transform[:,2], c=labels)

## 1.2. Classificação

### 1.2.1 Métricas
- acuracia
- precision
- recall
- f1score
- kappa

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score

# Calculo de métricas
def calculo_metricas(y_true, y_pred):    
    
    acuracia = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1score = f1_score(y_true, y_pred)
    kappa = cohen_kappa_score(y_true, y_pred)
    
    metricas = list(np.round([acuracia, precision, recall, f1score, kappa], 4))
    
    return metricas

### 1.2.2 Classificadores
- MLPClassifier(hidden_layer_sizes=100, activation='relu', *, solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10, max_fun=15000)

    Parameters
    ----------
    
    hidden_layer_sizes : tuple, length = n_layers - 2, default=(100,)
        The ith element represents the number of neurons in the ith
        hidden layer.
        
    activation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'
        Activation function for the hidden layer.
        - 'identity', no-op activation, useful to implement linear bottleneck,
          returns f(x) = x
        - 'logistic', the logistic sigmoid function,
          returns f(x) = 1 / (1 + exp(-x)).
        - 'tanh', the hyperbolic tan function,
          returns f(x) = tanh(x).
        - 'relu', the rectified linear unit function,
          returns f(x) = max(0, x)
          
    solver : {'lbfgs', 'sgd', 'adam'}, default='adam'
        The solver for weight optimization.
        - 'lbfgs' is an optimizer in the family of quasi-Newton methods.
        - 'sgd' refers to stochastic gradient descent.
        - 'adam' refers to a stochastic gradient-based optimizer proposed
          by Kingma, Diederik, and Jimmy Ba
        Note: The default solver 'adam' works pretty well on relatively
        large datasets (with thousands of training samples or more) in terms of
        both training time and validation score.
        For small datasets, however, 'lbfgs' can converge faster and perform
        better.
        
    alpha : float, default=0.0001
        L2 penalty (regularization term) parameter.
        
    batch_size : int, default='auto'
        Size of minibatches for stochastic optimizers.
        If the solver is 'lbfgs', the classifier will not use minibatch.
        When set to "auto", `batch_size=min(200, n_samples)`
        
    learning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'
        Learning rate schedule for weight updates.
        - 'constant' is a constant learning rate given by
          'learning_rate_init'.
        - 'invscaling' gradually decreases the learning rate at each
          time step 't' using an inverse scaling exponent of 'power_t'.
          effective_learning_rate = learning_rate_init / pow(t, power_t)
        - 'adaptive' keeps the learning rate constant to
          'learning_rate_init' as long as training loss keeps decreasing.
          Each time two consecutive epochs fail to decrease training loss by at
          least tol, or fail to increase validation score by at least tol if
          'early_stopping' is on, the current learning rate is divided by 5.
        Only used when ``solver='sgd'``.
        
    learning_rate_init : double, default=0.001
        The initial learning rate used. It controls the step-size
        in updating the weights. Only used when solver='sgd' or 'adam'.
        
    power_t : double, default=0.5
        The exponent for inverse scaling learning rate.
        It is used in updating effective learning rate when the learning_rate
        is set to 'invscaling'. Only used when solver='sgd'.
        
    max_iter : int, default=200
        Maximum number of iterations. The solver iterates until convergence
        (determined by 'tol') or this number of iterations. For stochastic
        solvers ('sgd', 'adam'), note that this determines the number of epochs
        (how many times each data point will be used), not the number of
        gradient steps.
        
    shuffle : bool, default=True
        Whether to shuffle samples in each iteration. Only used when
        solver='sgd' or 'adam'.
        
    random_state : int, RandomState instance, default=None
        Determines random number generation for weights and bias
        initialization, train-test split if early stopping is used, and batch
        sampling when solver='sgd' or 'adam'.
        Pass an int for reproducible results across multiple function calls.
        See :term:`Glossary <random_state>`.
        
    tol : float, default=1e-4
        Tolerance for the optimization. When the loss or score is not improving
        by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,
        unless ``learning_rate`` is set to 'adaptive', convergence is
        considered to be reached and training stops.
        
    verbose : bool, default=False
        Whether to print progress messages to stdout.
        
    warm_start : bool, default=False
        When set to True, reuse the solution of the previous
        call to fit as initialization, otherwise, just erase the
        previous solution. See :term:`the Glossary <warm_start>`.
        
    momentum : float, default=0.9
        Momentum for gradient descent update. Should be between 0 and 1. Only
        used when solver='sgd'.
        
    nesterovs_momentum : bool, default=True
        Whether to use Nesterov's momentum. Only used when solver='sgd' and
        momentum > 0.
        
    early_stopping : bool, default=False
        Whether to use early stopping to terminate training when validation
        score is not improving. If set to true, it will automatically set
        aside 10% of training data as validation and terminate training when
        validation score is not improving by at least tol for
        ``n_iter_no_change`` consecutive epochs. The split is stratified,
        except in a multilabel setting.
        If early stopping is False, then the training stops when the training
        loss does not improve by more than tol for n_iter_no_change consecutive
        passes over the training set.
        Only effective when solver='sgd' or 'adam'
        
    validation_fraction : float, default=0.1
        The proportion of training data to set aside as validation set for
        early stopping. Must be between 0 and 1.
        Only used if early_stopping is True
        
    beta_1 : float, default=0.9
        Exponential decay rate for estimates of first moment vector in adam,
        should be in [0, 1). Only used when solver='adam'
        
    beta_2 : float, default=0.999
        Exponential decay rate for estimates of second moment vector in adam,
        should be in [0, 1). Only used when solver='adam'
        
    epsilon : float, default=1e-8
        Value for numerical stability in adam. Only used when solver='adam'
        
    n_iter_no_change : int, default=10
        Maximum number of epochs to not meet ``tol`` improvement.
        Only effective when solver='sgd' or 'adam'
        .. versionadded:: 0.20
        
    max_fun : int, default=15000
        Only used when solver='lbfgs'. Maximum number of loss function calls.
        The solver iterates until convergence (determined by 'tol'), number
        of iterations reaches max_iter, or this number of loss function calls.
        Note that number of loss function calls will be greater than or equal
        to the number of iterations for the `MLPClassifier`.
        .. versionadded:: 0.22

### 1.2.3 Divisão em treino e teste

In [None]:
treino = 0.8
teste = 1 - treino

In [None]:
from sklearn.model_selection import train_test_split

def divisao_treino_teste():

    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=teste)
    X_train_pca, X_test_pca, y_train_pca, y_test_pca = train_test_split(transform, labels, test_size=teste)

    print('Imagens treino = ', len(X_train))
    print('Imagens teste = ', len(X_test))
    
    return X_train, X_test, y_train, y_test, X_train_pca, X_test_pca, y_train_pca, y_test_pca

In [None]:
X_train, X_test, y_train, y_test, X_train_pca, X_test_pca, y_train_pca, y_test_pca = divisao_treino_teste()

### 1.2.4 Executando classificador

In [None]:
# Seleção de hiperparâmetros
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(random_state=1)

parametros = {
    'hidden_layer_sizes': [(1,), (100,), (100,100), (100,100,100)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'batch_size': [8, 32],
    'learning_rate_init': [0.001, 0.5],
    'learning_rate': ['constant','adaptive'],
    'max_iter': [10, 200],
}

from sklearn.model_selection import GridSearchCV

clf = GridSearchCV(mlp, parametros, n_jobs=-1, scoring = 'accuracy', cv=3)
clf.fit(X_train, y_train)

# Best paramete set
print('Melhores parâmetros:\n', clf.best_params_)
print('Melhor precisão:\n', clf.best_score_)
print("\n")

# All results
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
    
y_true, y_pred = y_test, clf.predict(X_test)
print("\n\nMétricas: [Acurácia, Precision, Recall, F1-score, Kappa]")
print("Métricas: ", calculo_metricas(y_true, y_pred))

In [None]:
# Pega os parâmetros e insere na MLP
activation = clf.best_params_.get('activation')
alpha = clf.best_params_.get('alpha')
batch_size = clf.best_params_.get('batch_size')
hidden_layer_sizes = clf.best_params_.get('hidden_layer_sizes')
learning_rate = clf.best_params_.get('learning_rate')
learning_rate_init = clf.best_params_.get('learning_rate_init')
max_iter = clf.best_params_.get('max_iter')
solver = clf.best_params_.get('solver')

mlp = MLPClassifier(random_state=1, 
                    activation = activation, 
                    alpha = alpha, 
                    batch_size = batch_size, 
                    hidden_layer_sizes = hidden_layer_sizes,
                    learning_rate = learning_rate,
                    learning_rate_init = learning_rate_init,
                    max_iter = max_iter,
                    solver = solver
                   )

In [None]:
# Execução dos classificadores
def executa_classificadores_all_features(X_train, X_test, y_train, y_test):
    print("Métricas: [Acurácia, Precision, Recall, F1-score, Kappa]")
    print("====== Utilizando todas as features ======")
    mlp.fit(X_train, y_train)
    y_true, y_pred = y_test, mlp.predict(X_test)
    print("Métricas: ", calculo_metricas(y_true, y_pred))

In [None]:
executa_classificadores_all_features(X_train, X_test, y_train, y_test)

In [None]:
n_components = 5

print("Métricas: [Acurácia, Precision, Recall, F1-score, Kappa]\n\n")
print("=== Utilizando utilizando {} componentes PCA ===\n".format(n_components))

mlp.fit(X_train_pca[:,0:n_components], y_train_pca)
y_true, y_pred = y_test_pca, mlp.predict(X_test_pca[:,0:n_components])
print("Métricas: ", calculo_metricas(y_true, y_pred))

#### 1.2.4.1 Média e desvio padrão

In [None]:
n = 10
acc, pre, rec, f1, k = [], [], [], [], []

for i in range(n):
    clear_output(wait=True)
    print('Iteração {} de {}.'.format(i+1,n))

    # Nova divisão dos dados
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=teste)
    
    # MLP
    mlp.fit(X_train, y_train)
    y_true, y_pred = y_test, mlp.predict(X_test)
    
    metricas = calculo_metricas(y_true, y_pred)
    acc.append(metricas[0])
    pre.append(metricas[1])
    rec.append(metricas[2])
    f1.append(metricas[3])
    k.append(metricas[4])

print("\n=== Utilizando todas as features na MLP ===")
print("Mean Acurácia: {}+-{}".format(np.mean(acc), np.std(acc)))
print("Mean Precision: {}+-{}".format(np.mean(pre), np.std(pre)))
print("Mean Recall: {}+-{}".format(np.mean(rec), np.std(rec)))
print("Mean F1-score: {}+-{}".format(np.mean(f1), np.std(f1)))
print("Mean Kappa: {}+-{}".format(np.mean(k), np.std(k)))

In [None]:
n = 10
n_components = 10
acc_pca, pre_pca, rec_pca, f1_pca, k_pca = [], [], [], [], []

for i in range(n):
    clear_output(wait=True)
    print('Iteração {} de {}.'.format(i+1,n))
    
    # Nova divisão dos dados
    X_train_pca, X_test_pca, y_train_pca, y_test_pca = train_test_split(transform[:,0:n_components], labels, test_size=teste)
    
    # MLP
    mlp.fit(X_train_pca, y_train_pca)
    y_true, y_pred = y_test_pca, mlp.predict(X_test_pca)
    
    metricas = calculo_metricas(y_true, y_pred)
    acc_pca.append(metricas[0])
    pre_pca.append(metricas[1])
    rec_pca.append(metricas[2])
    f1_pca.append(metricas[3])
    k_pca.append(metricas[4])
    
print("\n=== Utilizando {} componentes PCA no XGBooster ===".format(n_components))    
print("Mean Acurácia: {}+-{}".format(np.mean(acc_pca), np.std(acc_pca)))
print("Mean Precision: {}+-{}".format(np.mean(pre_pca), np.std(pre_pca)))
print("Mean Recall: {}+-{}".format(np.mean(rec_pca), np.std(rec_pca)))
print("Mean F1-score: {}+-{}".format(np.mean(f1_pca), np.std(f1_pca)))
print("Mean Kappa: {}+-{}".format(np.mean(k_pca), np.std(k_pca)))

# 2. Extraindo características com o descritor HOG

In [None]:
from skimage.feature import hog
from skimage import feature, transform

features = np.zeros((len(labels), 53792))
start = time.time()

for id_im, imagem in enumerate(imagens):
    clear_output(wait=True)
    print('Extraindo atributos: {} imagens processadas de {}.'.format(id_im+1, len(imagens)))

    # Extrai características
    grayim = rgb2gray(imagem)
    grayim = transform.resize(grayim,(330,330))

    fd = feature.hog(grayim, orientations=8, pixels_per_cell=(4, 4), cells_per_block=(1, 1), transform_sqrt=True)
    features[id_im,:] = fd

end = time.time()
print('time = ', end - start)

In [None]:
features.shape

### 2.1 Visualização dos dados

In [None]:
fig = plt.figure(figsize=(10,6))
plt.scatter(features[:,1],features[:,2], c=labels)

### 2.2 Divisão em treino e teste

In [None]:
# Divisão dos dados
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=teste)

### 2.3 Execução dos classificadores

In [None]:
executa_classificadores_all_features(X_train, X_test, y_train, y_test)