# <p style="text-align: center;">Aula Prática 06</p>

## Objetivos desta aula:

    - Extração de Características:
        - LBP
        - GLCM
    - Classificação:
        - Árvore
        - SVM


## Professor: Jefersson dos Santos - jefersson@dcc.ufmg.br
### Monitor: Caio Cesar - caiosilva@ufmg.br

Importações necessárias

In [None]:
from skimage.transform import rotate
from skimage.feature import local_binary_pattern, greycomatrix, greycoprops
from skimage import data
from skimage.color import label2rgb
import matplotlib.pyplot as plt
import numpy as np
from __future__ import print_function


%matplotlib inline

# Extração de cacrterísticas:

## LBP

### Conceito

In [None]:
METHOD = 'uniform'
plt.rcParams['font.size'] = 9


def plot_circle(ax, center, radius, color):
    circle = plt.Circle(center, radius, facecolor=color, edgecolor='0.5')
    ax.add_patch(circle)


def plot_lbp_model(ax, binary_values):
    """Draw the schematic for a local binary pattern."""
    # Geometry spec
    theta = np.deg2rad(45)
    R = 1
    r = 0.15
    w = 1.5
    gray = '0.5'

    # Draw the central pixel.
    plot_circle(ax, (0, 0), radius=r, color=gray)
    # Draw the surrounding pixels.
    for i, facecolor in enumerate(binary_values):
        x = R * np.cos(i * theta)
        y = R * np.sin(i * theta)
        plot_circle(ax, (x, y), radius=r, color=str(facecolor))

    # Draw the pixel grid.
    for x in np.linspace(-w, w, 4):
        ax.axvline(x, color=gray)
        ax.axhline(x, color=gray)

    # Tweak the layout.
    ax.axis('image')
    ax.axis('off')
    size = w + 0.2
    ax.set_xlim(-size, size)
    ax.set_ylim(-size, size)


fig, axes = plt.subplots(ncols=5, figsize=(7, 2))

titles = ['flat', 'flat', 'edge', 'corner', 'non-uniform']

binary_patterns = [np.zeros(8),
                   np.ones(8),
                   np.hstack([np.ones(4), np.zeros(4)]),
                   np.hstack([np.zeros(3), np.ones(5)]),
                   [1, 0, 0, 1, 1, 1, 0, 0]]

for ax, values, name in zip(axes, binary_patterns, titles):
    plot_lbp_model(ax, values)
    ax.set_title(name)

### Histograma

In [None]:
# settings for LBP
radius = 3
n_points = 8 * radius


def overlay_labels(image, lbp, labels):
    mask = np.logical_or.reduce([lbp == each for each in labels])
    return label2rgb(mask, image=image, bg_label=0, alpha=0.5)


def highlight_bars(bars, indexes):
    for i in indexes:
        bars[i].set_facecolor('r')


image = data.load('brick.png')
lbp = local_binary_pattern(image, n_points, radius, METHOD)


def hist(ax, lbp):
    n_bins = int(lbp.max() + 1)
    return ax.hist(lbp.ravel(), normed=True, bins=n_bins, range=(0, n_bins),
                   facecolor='0.5')


# plot histograms of LBP of textures
fig, (ax_img, ax_hist) = plt.subplots(nrows=2, ncols=3, figsize=(9, 6))
plt.gray()

titles = ('edge', 'flat', 'corner')
w = width = radius - 1
edge_labels = range(n_points // 2 - w, n_points // 2 + w + 1)
flat_labels = list(range(0, w + 1)) + list(range(n_points - w, n_points + 2))
i_14 = n_points // 4            # 1/4th of the histogram
i_34 = 3 * (n_points // 4)      # 3/4th of the histogram
corner_labels = (list(range(i_14 - w, i_14 + w + 1)) +
                 list(range(i_34 - w, i_34 + w + 1)))

label_sets = (edge_labels, flat_labels, corner_labels)

for ax, labels in zip(ax_img, label_sets):
    ax.imshow(overlay_labels(image, lbp, labels))

for ax, labels, name in zip(ax_hist, label_sets, titles):
    counts, _, bars = hist(ax, lbp)
    highlight_bars(bars, labels)
    ax.set_ylim(ymax=np.max(counts[:-1]))
    ax.set_xlim(xmax=n_points + 2)
    ax.set_title(name)

ax_hist[0].set_ylabel('Percentage')
for ax in ax_img:
    ax.axis('off')

### Comparativo

In [None]:
# settings for LBP
radius = 2
n_points = 8 * radius


def kullback_leibler_divergence(p, q):
    p = np.asarray(p)
    q = np.asarray(q)
    filt = np.logical_and(p != 0, q != 0)
    return np.sum(p[filt] * np.log2(p[filt] / q[filt]))


def match(refs, img):
    best_score = 10
    best_name = None
    lbp = local_binary_pattern(img, n_points, radius, METHOD)
    n_bins = int(lbp.max() + 1)
    hist, _ = np.histogram(lbp, normed=True, bins=n_bins, range=(0, n_bins))
    for name, ref in refs.items():
        ref_hist, _ = np.histogram(ref, normed=True, bins=n_bins,
                                   range=(0, n_bins))
        score = kullback_leibler_divergence(hist, ref_hist)
        if score < best_score:
            best_score = score
            best_name = name
    return best_name


brick = data.load('brick.png')
grass = data.load('grass.png')
wall = data.load('rough-wall.png')

refs = {
    'brick': local_binary_pattern(brick, n_points, radius, METHOD),
    'grass': local_binary_pattern(grass, n_points, radius, METHOD),
    'wall': local_binary_pattern(wall, n_points, radius, METHOD)
}

# classify rotated textures
print('Rotated images matched against references using LBP:')
print('original: brick, rotated: 30deg, match result: ',
      match(refs, rotate(brick, angle=30, resize=False)))
print('original: brick, rotated: 70deg, match result: ',
      match(refs, rotate(brick, angle=70, resize=False)))
print('original: grass, rotated: 145deg, match result: ',
      match(refs, rotate(grass, angle=145, resize=False)))

# plot histograms of LBP of textures
fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(nrows=2, ncols=3,
                                                       figsize=(9, 6))
plt.gray()

ax1.imshow(brick)
ax1.axis('off')
hist(ax4, refs['brick'])
ax4.set_ylabel('Percentage')

ax2.imshow(grass)
ax2.axis('off')
hist(ax5, refs['grass'])
ax5.set_xlabel('Uniform LBP values')

ax3.imshow(wall)
ax3.axis('off')
hist(ax6, refs['wall'])

plt.show()

## GCLM (Haralick Features) 

In [None]:



PATCH_SIZE = 21

# open the camera image
image = data.camera()

# select some patches from grassy areas of the image
grass_locations = [(474, 291), (440, 433), (466, 18), (462, 236)]
grass_patches = []
for loc in grass_locations:
    grass_patches.append(image[loc[0]:loc[0] + PATCH_SIZE,
                               loc[1]:loc[1] + PATCH_SIZE])

# select some patches from sky areas of the image
sky_locations = [(54, 48), (21, 233), (90, 380), (195, 330)]
sky_patches = []
for loc in sky_locations:
    sky_patches.append(image[loc[0]:loc[0] + PATCH_SIZE,
                             loc[1]:loc[1] + PATCH_SIZE])

# compute some GLCM properties each patch
xs = []
ys = []
for patch in (grass_patches + sky_patches):
    glcm = greycomatrix(patch, [5], [0], 256, symmetric=True, normed=True)
    xs.append(greycoprops(glcm, 'dissimilarity')[0, 0])
    ys.append(greycoprops(glcm, 'correlation')[0, 0])

# create the figure
fig = plt.figure(figsize=(8, 8))

# display original image with locations of patches
ax = fig.add_subplot(3, 2, 1)
ax.imshow(image, cmap=plt.cm.gray, interpolation='nearest',
          vmin=0, vmax=255)
for (y, x) in grass_locations:
    ax.plot(x + PATCH_SIZE / 2, y + PATCH_SIZE / 2, 'gs')
for (y, x) in sky_locations:
    ax.plot(x + PATCH_SIZE / 2, y + PATCH_SIZE / 2, 'bs')
ax.set_xlabel('Original Image')
ax.set_xticks([])
ax.set_yticks([])
ax.axis('image')

# for each patch, plot (dissimilarity, correlation)
ax = fig.add_subplot(3, 2, 2)
ax.plot(xs[:len(grass_patches)], ys[:len(grass_patches)], 'go',
        label='Grass')
ax.plot(xs[len(grass_patches):], ys[len(grass_patches):], 'bo',
        label='Sky')
ax.set_xlabel('GLCM Dissimilarity')
ax.set_ylabel('GLCM Correlation')
ax.legend()

# display the image patches
for i, patch in enumerate(grass_patches):
    ax = fig.add_subplot(3, len(grass_patches), len(grass_patches)*1 + i + 1)
    ax.imshow(patch, cmap=plt.cm.gray, interpolation='nearest',
              vmin=0, vmax=255)
    ax.set_xlabel('Grass %d' % (i + 1))

for i, patch in enumerate(sky_patches):
    ax = fig.add_subplot(3, len(sky_patches), len(sky_patches)*2 + i + 1)
    ax.imshow(patch, cmap=plt.cm.gray, interpolation='nearest',
              vmin=0, vmax=255)
    ax.set_xlabel('Sky %d' % (i + 1))


# display the patches and plot
fig.suptitle('Grey level co-occurrence matrix features', fontsize=14)
plt.show()

# Classificação Supervisionada

## Árvore de Decisão

Importações necessárias:

In [None]:
import random
import numpy as np
from sklearn import cross_validation
from sklearn.datasets import load_iris
from sklearn import tree
from sklearn.metrics import accuracy_score # More metrics here: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics

Carregando o conjunto de dados

In [None]:
# Loading Iris dataset. More information here: http://en.wikipedia.org/wiki/Iris_flower_data_set
iris = load_iris()

# Printing some informations about the dataset It has 150 instances equally divided into 3 possible classes. In this case, the iris.data has instances from 0 to 49 being from classe 0, instances from 50 to 99 from class 1 and the remaining from class 3.
print (iris.feature_names)
print (iris.data)

print (iris.target_names) # This is the possible classes
print (iris.target) # This is the groundtruth

print (type(iris.data)) # Type of a variable
print (iris.data.shape) # Number of lines x number of columns (150x4, in this case). Shape function can only be used with numpy array type.


Criação do modelo

In [None]:
# Creating a DT with default parameters.
# More about the parameters here: http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html#sklearn.tree.DecisionTreeClassifier
clf_1 = tree.DecisionTreeClassifier()

Treinamento

In [None]:
# Fitting the DT using the whole dataset using DT. 
clf_1.fit(iris.data, iris.target)

Teste

In [None]:
# Predicting the class of the first instances. The first argument :1 represents the number of lines to consider starting from the first one. In this case, we consider only one line. The second argument symbolizes that it has to consider all columns.
print (clf_1.predict(iris.data[:1, :]))

In [None]:
# Classifying all dataset
prediction = clf_1.predict(iris.data)
# Calculating the accuracy of the prediction and the groundtruth, which is 1 in this case, since the dataset is small and was used as training.
accuracy = accuracy_score(iris.target, prediction)
print (accuracy)


Validação

In [None]:
# Split the Iris dataset using 40% for test
train, test, train_target, test_target = cross_validation.train_test_split(iris.data, iris.target, test_size=0.4, random_state=0)

# Creating a new DT.
clf_2 = tree.DecisionTreeClassifier()

# Fitting some training samples. 
clf_2.fit(train, train_target)

# Classifying test samples
prediction_2 = clf_2.predict(test)
# Calculating the accuracy of the prediction and the groundtruth. Of course, it is not equal one in this case, but very close, since the dataset is small.
accuracy_2 = accuracy_score(test_target, prediction_2)
print (accuracy_2)

## SVM

Importações necessárias:

In [None]:
import random
import numpy as np
from sklearn import cross_validation
from sklearn.datasets import load_iris
from sklearn.datasets import load_digits
from sklearn import grid_search
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.cross_validation import KFold

In [None]:
'''
    Trains a simple classifier (eg.: SVMs, DT, ...)
'''
def Simple_Classifier_Function(train, target, test, test_target, classifier, clf_name):
    #Start Simple Classifier

    classifier.fit(train, target)

    prediction = classifier.predict(test)
    accuracy = accuracy_score(test_target, prediction)

    print ('%s Accuracy %.2f' % (clf_name, accuracy))
    return accuracy

'''
    Search for the best set of parameters for a determined classifier. In this case, it evaluates only two parameters: C (penalty parameter) and gamma (kernel coefficient).
    These are fundamental for the SVM-RBF classifier.
'''
def Simple_Classifier_Function_Grid_Search(train, target, classifier, C, gamma):
    #Start Classifier Grid Search

    # Parameters to be evaluated
    parameters = {'C': C, 'gamma': gamma}

    # Searching parameters with 6 cores. Verbose prints in screen at each iteration (higher value, more information).
    clf = grid_search.GridSearchCV(classifier, param_grid=parameters, verbose=1, n_jobs=6)

    clf.fit(train, target)
    print (clf.best_score_)
    print (clf.best_params_)

    return clf

Exemplo Binário

In [None]:
print ('Simple Example')

train = [[0, 0], [1, 2], [1, 0], [0, 2], [0, 1], [1, 2]]
train_target = [0, 1, 1, 0, 0, 1]

test = [[2, 2]]
test_target = [1]

Simple_Classifier_Function(train, train_target, test, test_target, svm.SVC(), 'SVM-RBF') # SVM-RBF, which is the default configuration of the svm.SVC()
Simple_Classifier_Function(train, train_target, test, test_target, svm.LinearSVC(),'Linear SVM') # Linear SVM

Exemplo Iris

In [None]:
print ('Iris Dataset')

# Loading dataset
iris = load_iris()

# Creating 5-folds (cv parameter) with the iris dataset and training a SVM-RBF
scores = cross_validation.cross_val_score(svm.SVC(), iris.data, iris.target, cv=5)
print ('SVM-RBF accuracy for 5-fold', scores.mean())

       
       
# Checando manualmente
print ('\nSVM accuracy for each fold')

# Create the folds (5, in this case). This function returns indices to split data in train test sets.
kf = cross_validation.StratifiedKFold(iris.target, n_folds=5)

scores_SVMRBF = 0
scores_LinearSVM = 0

fold = 1
for train, test in kf:
    print ("-------------------> Fold %d" % fold)
    fold+=1

    # Using indices returned to separate the folds
    fold_train = [iris.data[i] for i in train]
    fold_target = [iris.target[i] for i in train]
    fold_train_test = [iris.data[i] for i in test]
    fold_target_test = [iris.target[i] for i in test]

    scores_SVMRBF = scores_SVMRBF + Simple_Classifier_Function(fold_train, fold_target, fold_train_test, fold_target_test, svm.SVC(), 'SVM-RBF')
    scores_LinearSVM = scores_LinearSVM + Simple_Classifier_Function(fold_train, fold_target, fold_train_test, fold_target_test, svm.LinearSVC(), 'Linear SVM')

print ('\nFinal accuracy')
print ('SVM-RBF accuracy', scores_SVMRBF/5.0)
print ('Linear SVM accuracy', scores_LinearSVM/5.0)


Exemplo com dígitos

In [None]:
print ('Digits Dataset')

# Loading dataset. More about this dataset here: http://archive.ics.uci.edu/ml/datasets/Pen-Based+Recognition+of+Handwritten+Digits
digits = load_digits()

# Creating 5-folds (cv parameter) with the DIGITS dataset and training a SVM-RBF. The parameter kernel is optional in this case, since it is the default case.
scores = cross_validation.cross_val_score(svm.SVC(kernel='rbf'), digits.data, digits.target, cv=5)
print ('Default SVM-RBF accuracy for 5-fold', scores.mean()) # 0.44878

# For all SVM-RBF, two parameters are fundamental: C (penalty parameter) and gamma (kernel coefficient).
# Lets search this parameters.
# For the penalty parameter C
penalty = np.logspace(-2, 10, 13)
print ('Penalty parameter', penalty)
# For the coefficient parameter gamma
coef = np.logspace(-9, 3, 13)
print ('Kernel coefficient', coef)

best_clf = Simple_Classifier_Function_Grid_Search(digits.data, digits.target, svm.SVC(kernel='rbf'), C=penalty, gamma=coef)
scores = cross_validation.cross_val_score(svm.SVC(kernel='rbf', C=best_clf.best_params_['C'], gamma=best_clf.best_params_['gamma']), digits.data, digits.target, cv=5)
print ('Tuned SVM-RBF accuracy for 5-fold', scores.mean()) #Improved results 0.9727


# Exercício

    Crie um novo notebook jupyter cujo nome deve seguir a seguinte syntaxe:
    
        lab6-SUAMATRICULA.ypnb
        
    Carregar as imagens do conjunto de dados Texturas.
    Extrair as características (GLCM e LBP) dos daods.
    Aplicar o esquema de validação cruzada na Árvore de Decisão e SVM.
    Responder as seguintes perguntas:
        (1) qual descritor apresentou os melhores resultados? 
        (2) qual abordagem de aprendizado apresentou os melhores resultados?
        