In [15]:
#Ass Mateus e Yan

# IMPORTAÇÕES
import numpy as np
import os, cv2, random
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline 
import tensorflow as tf

from tqdm import tqdm
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
import sklearn.metrics
from sklearn.metrics import classification_report
from sklearn.model_selection import  cross_val_score
from sklearn.preprocessing import StandardScaler 


from skimage.feature import greycomatrix, greycoprops
from skimage.feature import hog

In [16]:
# DEFINIÇÕES


TRAIN_DIR = 'kaggle/train/'

num_img = 1000                      # Opção do número de imagens a serem usadas
n_classes = 2                     # A base de dados DogsAndCats tem 2 classes de objetos!!!!
nepochs = 10                      # Numero de epocas para o treinamento!!!
batch_size = 32                   # Numero de imagens por batch!!!
image_size = 128                   # Todas as imagens devem ser redimensionadas para 32x32 pixels!!!
nchannels = 3                     # Numero de canais de cores na imagem!!!
n_input = image_size * image_size * nchannels # Tamanho da entrada!
learning_rate = 1e-3              # Taxa de aprendizado!!!
kprob = 0.5                       # Probabilidade para dropout!!!
test_size = 0.25                  # Porcentagem que sobra pra teste
random_state = 42                 # Semente de aleatoriedade

In [17]:
#FUNÇÕES AUXILIARES


# Retorna a imagem apontada pelo endereço do arquivo redimensionada para o tamnho correto
# Temos a opção de ler a imagem colorida ou em escala de cinza
def read_image(file_path):
    img = cv2.imread(file_path, cv2.IMREAD_COLOR)
#    img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
    return cv2.resize(img, (image_size, image_size), interpolation=cv2.INTER_CUBIC)


# Preparação dos Dados
# Normaliza e passa as imagens no formato de um vetor de dados de tipo dtype
def prep_data(images):
    count = len(images)
    data = np.ndarray((count, nchannels, image_size, image_size), dtype = np.float32)

    for i, image_file in enumerate(images):
        image = read_image(image_file)
        
        #primeira forma de normalização
        #min_val = np.min(image)
        #max_val = np.max(image )
        #image = (image-min_val)/(max_val-min_val)
        #segunda forma de normalização
        image = image/255.0    
        
        data[i] = image.T
        if i%250 == 0: print('Processed {} of {}'.format(i, count))    
    return data

        
#---------------------------------------------

def create_label(image_name):
    word_label = image_name.split('.')[-3]
    if word_label == 'cat':
        return np.array([1,0])
    elif word_label == 'dog':
        return np.array([0,1])
    else: 
        print ("Esta classe não existe!!!!!")


def read_dataset (filename):
    X = []
    Y = []
    for img in tqdm(os.listdir(filename)):
        path = os.path.join(filename, img)
        img_data = cv2.imread(path)
        #img_data = cv2.imread(path, cv2.IMREAD_GRAYSCALE) #converte para níveis de cinza
        img_data = cv2.resize(img_data, (image_size, image_size)) #deixa com as dimensões definidas
        
        #primeira forma de normalização
        #min_val = np.min(img_data)
        #max_val = np.max(img_data )
        #img_data = (img_data-min_val)/(max_val-min_val)
        #segunda forma de normalização
        img_data = img_data/255.0    
        
        #cria os vetores de dados e de labels
        X.append(np.array(img_data))
        Y.append(np.array(create_label(img)))
        
    return X,Y



def next_batch (num, data, labels_n):
    idx = np.arange(0 , len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = data[idx]
    labels_shuffle = labels_n[idx]
    return data_shuffle, labels_shuffle


# Printa a imagem de um gato e de um cachorro no indice apontado
def show_cats_and_dogs(idx):
    cat = read_image(train_cats[idx])
    dog = read_image(train_dogs[idx])
    pair = np.concatenate((cat, dog), axis=1)
    plt.figure(figsize=(10,5))
    plt.imshow(pair)
    plt.show()

In [18]:
#Dois exemplos de descritores dos professores

# Nesse a imagem é redimensionada e achatada a imagem num vetor de características
def image_to_feature_vector(image, size=(32, 32)):
    return cv2.resize(image, size).flatten()

# Neste segundo é extraído um histograma do HSV e é achatado
def extract_color_histogram(image, bins=(8, 8, 8)):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins,
        [0, 180, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()

In [19]:
#NOSSOS DESCRITORES

#CNN igual ao do slide
def cnn (x, prob):
    
    input_layer = tf.reshape(x, shape = [-1, image_size, image_size, nchannels])
    
    # Primeira camada de convolucao:
    conv1 = tf.layers.conv2d (
                inputs=input_layer,
                filters=32,
                kernel_size=[3, 3],
                padding="same",
                activation=tf.nn.relu
            )
    
    # Primeira camada de pooling:
    pool1 = tf.layers.max_pooling2d (
                inputs=conv1, 
                pool_size=[2, 2], 
                strides=2
            )
    
    # Segunda camada de convolucao:
    conv2 = tf.layers.conv2d (
                inputs=pool1,
                filters=64,
                kernel_size=[5, 5],
                padding="same",
                activation=tf.nn.relu
            )
    
    # Segunda camada de pooling:
    pool2 = tf.layers.max_pooling2d (
                inputs=conv2, 
                pool_size=[2, 2], 
                strides=2
            )
    
    flat = tf.contrib.layers.flatten(pool2)
    
    # Aqui é descomentado quando se quer retornar apenas o descritor
#    return flat
    
    dense = tf.contrib.layers.fully_connected(
                inputs = flat,
                num_outputs = 128,
                activation_fn = tf.nn.relu
            )
    
    dropout = tf.nn.dropout(dense, prob)
    
    out = tf.contrib.layers.fully_connected(
                inputs = dropout,
                num_outputs = n_classes,
                activation_fn = None
            )
    
    # Aqui retornamos o classificador em cnn
    return out


# MLP da lista de exercícios
def mlp (x):
    # Aqui é definido o número de neurônios em cada camada
    n_camada_1 = 256
    n_camada_2 = 128
   
    # Primeira camada da rede:
    W1 = tf.get_variable('w1', [n_input, n_camada_1], initializer = tf.random_normal_initializer())
    b1 = tf.get_variable('b1', [n_camada_1], initializer = tf.random_normal_initializer())
    y1 = tf.nn.sigmoid(tf.matmul(x, W1) + b1) #tf.nn.relu() or tf.matmul(x, W1) + b1!!!! 

    # Segunda camada da rede:
    W2 = tf.get_variable('w2', [n_camada_1, n_camada_2], initializer = tf.random_normal_initializer())
    b2 = tf.get_variable('b2', [n_camada_2], initializer = tf.random_normal_initializer())
    y2 = tf.nn.sigmoid(tf.matmul(y1, W2) + b2) 

    # Ultima camada da rede:
    W3 = tf.get_variable('w3',[n_camada_2, n_classes], initializer = tf.random_normal_initializer())
    b3 = tf.get_variable('b3',[n_classes], initializer = tf.random_normal_initializer())
    out_layer = tf.matmul(y2, W3) + b3 

    return out_layer

In [20]:
#MONTANDO A BASE DE TREINAMENTO

train_images = [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR)] #full dataset: dogs and cats
train_dogs =   [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR) if 'dog' in i]
train_cats =   [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR) if 'cat' in i]

# considera apenas num_img imagens. Para o dataset completo, desconsiderar.
train_images = train_dogs[:num_img] + train_cats[:num_img]
random.shuffle(train_images)

# Leitura das imagens
train = prep_data(train_images)
print("Train shape: {}".format(train.shape))

# Cria os labels (rótulos)
labels = []
labels_n = []
for i in train_images:
    if 'dog' in i:
        labels.append(1)
        labels_n.append(np.array([0,1]))
    else:
        labels.append(0)
        labels_n.append(np.array([1,0]))


Processed 0 of 2000
Processed 250 of 2000
Processed 500 of 2000
Processed 750 of 2000
Processed 1000 of 2000
Processed 1250 of 2000
Processed 1500 of 2000
Processed 1750 of 2000
Train shape: (2000, 3, 128, 128)


In [7]:
#Apenas mostra algumas imagens do conjunto de treinamento


#for idx in range(0,3):
 #   show_cats_and_dogs(idx)

In [21]:
#Aqui passa cada imagem pelos descritores e salva nos vetores abaixo

rawImages = []
descHist = []

count = len(train_images)

for i, image_file in enumerate(train_images):
    image = read_image(image_file)
    pixels = image_to_feature_vector(image)
    histogram = extract_color_histogram(image)
    
    rawImages.append(pixels)
    descHist.append(histogram)
        
    if i%250 == 0: print('Processed {} of {}'.format(i, count))

        


Processed 0 of 2000
Processed 250 of 2000
Processed 500 of 2000
Processed 750 of 2000
Processed 1000 of 2000
Processed 1250 of 2000
Processed 1500 of 2000
Processed 1750 of 2000


In [9]:
#Avalia o primeiro descritor: as imagens raw
#Usa KNN, Arvore e Gaussian

(X_train, X_test, y_train, y_test) = train_test_split(
    rawImages, labels, test_size=0.25, random_state=42)

classifiers = [
    KNeighborsClassifier(17),    
    DecisionTreeClassifier(),
    GaussianNB()]

for clf in classifiers:
    clf.fit(X_train, y_train)
    name = clf.__class__.__name__
    
    print("="*30)
    print(name)
    
    print('****Results****')
    train_predictions = clf.predict(X_test)
    acc = clf.score(X_test, y_test)
    print("accuracy: {:.2f}%".format(acc * 100))    

KNeighborsClassifier
****Results****
accuracy: 57.80%
DecisionTreeClassifier
****Results****
accuracy: 55.60%
GaussianNB
****Results****
accuracy: 54.40%


In [10]:
#Avalia o segundo descritor: color histogram
#Novamente com os três classificadores

(X_train, X_test, y_train, y_test) = train_test_split(
    descHist, labels, test_size=0.25, random_state=42)

classifiers = [
    KNeighborsClassifier(17),    
    DecisionTreeClassifier(),
    GaussianNB()]

for clf in classifiers:
    clf.fit(X_train, y_train)
    name = clf.__class__.__name__
    
    print("="*30)
    print(name)
    
    print('****Results****')
    train_predictions = clf.predict(X_test)
    acc = clf.score(X_test, y_test)
    print("accuracy: {:.2f}%".format(acc * 100))   

KNeighborsClassifier
****Results****
accuracy: 60.20%
DecisionTreeClassifier
****Results****
accuracy: 61.80%
GaussianNB
****Results****
accuracy: 57.60%


In [11]:
#Avalia a combinação dos dois primeiros descritores!

#ATENÇÃO: ESTE É APENAS UM CÓDIGO EXEMPLO. VOCÊ DEVE DESENVOLVER
#DESCRITORES MAIS ROBUSTOS, BEM COMO EXPLORAR MELHOR AS MÉTRICAS
#DE AVALIAÇÃO (MATRIZ DE CONFUSÃO, ETC)

trainAux = np.hstack((descHist, rawImages))
(X_train, X_test, y_train, y_test) = train_test_split(
    trainAux, labels, test_size=0.25, random_state=42)

classifiers = [
    KNeighborsClassifier(17),
    DecisionTreeClassifier(),
    GaussianNB()]

for clf in classifiers:
    clf.fit(X_train, y_train)
    name = clf.__class__.__name__
    
    print("="*30)
    print(name)
    
    print('****Results****')
    train_predictions = clf.predict(X_test)
    acc = clf.score(X_test, y_test)
    print("accuracy: {:.2f}%".format(acc * 100))   

KNeighborsClassifier
****Results****
accuracy: 57.80%
DecisionTreeClassifier
****Results****
accuracy: 56.80%
GaussianNB
****Results****
accuracy: 59.80%


In [22]:
#----------------------------------------------------

# Leitura da base de dados para as redes neurais:

(X_train, X_test, Y_train, Y_test) = train_test_split(
    train, labels_n, test_size=0.25, random_state=42)

#X_train,Y_train = read_dataset (TRAIN_DIR)
X_train = np.asarray(X_train).reshape(-1, n_input)
Y_train = np.asarray(Y_train)

#X_test,Y_test = read_dataset (TEST_DIR)
X_test = np.asarray(X_test).reshape(-1, n_input)
Y_test = np.asarray(Y_test)

#terceira forma de normalização
#ATENÇÃO: para testar, comente todas as formas de normalização da função read_dataset()
#scaler = StandardScaler()  
#scaler.fit(X_train)  
#X_train = scaler.transform(X_train)  
#X_test = scaler.transform(X_test)  

# Variáveis do tensorflow:
Y = tf.placeholder(tf.float32, [None, n_classes])
X = tf.placeholder(tf.float32, [None, image_size * image_size * nchannels])
prob = tf.placeholder(tf.float32, name='keep_prob')



# ESCOLHER DESCRITOR DE REDE
#Ypred = mlp (X) 
Ypred = cnn (X, kprob)

# Funções de custo:
error1 = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(tf.nn.softmax(Ypred)), reduction_indices = [1]))
error2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = Ypred, labels = Y)) 
error3 = tf.reduce_mean(tf.reduce_sum(tf.square(tf.nn.softmax(Ypred) - Y), reduction_indices = [1]))
error = error2
    
# Funções para minimização de erro: 
optimizer1 = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(error)
optimizer2 = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(error)
optimizer = optimizer2
 
corr = tf.equal(tf.argmax(Ypred,1),tf.argmax(Y,1))
 
accuracy = tf.reduce_mean(tf.cast(corr,tf.float32))

In [24]:
# Treina e Avalia os descritores e classidicadores de Redes


# Inicialização de variáveis:
init = tf.initialize_all_variables()

with tf.Session() as sess:
    sess.run(init)
        
    # Treino:
    for epoch in range(nepochs):
        train_err = 0
        train_acc = 0
        train_batches = 0
        total_batch = int(len(X_train)/batch_size)
        for i in range(total_batch):
            batch_xs, batch_ys = next_batch (batch_size, X_train, Y_train)
            sess.run(optimizer, feed_dict={X: batch_xs, Y: batch_ys})
            err, acc = sess.run([error,accuracy], feed_dict={X: batch_xs, Y: batch_ys, prob: kprob})
            train_err += err
            train_acc += acc
            train_batches += 1
        print("Epoch: ", '%2d' % (epoch+1))
        print("  training loss:\t\t{:.6f}".format(train_err/train_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(train_acc/train_batches * 100))
        
    # Testes:
    test_err = 0
    test_acc = 0
    test_batches = 0
    total_batch = int(len(X_test)/batch_size)
    for i in range(total_batch):
        batch_xs, batch_ys = next_batch (batch_size, X_test, Y_test)
        sess.run(optimizer, feed_dict={X: batch_xs, Y: batch_ys})
        err, acc = sess.run([error,accuracy], feed_dict={X: batch_xs, Y: batch_ys, prob: kprob})
        test_err += err
        test_acc += acc
        test_batches += 1
    print("Final results:")
    print("  test loss:\t\t\t{:.6f}".format(test_err/test_batches))
    print("  test accuracy:\t\t{:.2f} %".format((test_acc/test_batches)*100))



('Epoch: ', ' 1')
  training loss:		0.848206
  validation accuracy:		52.92 %
('Epoch: ', ' 2')
  training loss:		0.690303
  validation accuracy:		55.98 %
('Epoch: ', ' 3')
  training loss:		0.689215
  validation accuracy:		55.16 %
('Epoch: ', ' 4')
  training loss:		0.643687
  validation accuracy:		61.35 %
('Epoch: ', ' 5')
  training loss:		0.612530
  validation accuracy:		64.81 %
('Epoch: ', ' 6')
  training loss:		0.578339
  validation accuracy:		68.75 %
('Epoch: ', ' 7')
  training loss:		0.518985
  validation accuracy:		74.52 %
('Epoch: ', ' 8')
  training loss:		0.455114
  validation accuracy:		79.14 %
('Epoch: ', ' 9')
  training loss:		0.414634
  validation accuracy:		80.50 %
('Epoch: ', '10')
  training loss:		0.384577
  validation accuracy:		82.81 %
Final results:
  test loss:			0.653212
  test accuracy:		63.96 %
