## Importando Bibliotecas

In [38]:
import tensorflow as tf 
from tensorflow import keras
from sklearn import metrics
from keras.models import Sequential
from keras.layers import Input, Dense
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
from keras.optimizers import Adam
from keras.models import Model
import sklearn as sk
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import normalize

## Pre-processamento dos dados
**Carregando os dados de treino e teste:**

In [39]:
train_images = pd.read_csv('../fashion-mnist_train.csv')
test_images = pd.read_csv('../fashion-mnist_test.csv')

**Separando as labels do conjuntos:**

In [40]:
train_labels=train_images.loc[:, ['label']]
train_images=train_images.drop(['label'], axis=1)
test_labels=test_images.loc[:, ['label']]
test_images=test_images.drop(['label'], axis=1)

**Normalizando os data-sets:**

In [41]:
train_images = train_images / 255.0
test_images = test_images / 255.0


----------------------
## Construção do modelo base
**Esta rede será utilizada posteriomente para comparações entre os modelos de dimensionalidade reduzida utilizando PCA e autoencoder.**

**Definição da estrutura do modelo por meio do Keras Sequential:**

In [42]:
model = keras.Sequential([
    keras.layers.Dense(64,activation=tf.nn.relu,input_dim=(784)),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dense(32, activation=tf.nn.softmax)
])

model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 64)                50240     
_________________________________________________________________
dense_10 (Dense)             (None, 128)               8320      
_________________________________________________________________
dense_11 (Dense)             (None, 32)                4128      
Total params: 62,688
Trainable params: 62,688
Non-trainable params: 0
_________________________________________________________________


**Compilação do modelo:**

In [43]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

**Treino do modelo:**

In [None]:
model.fit(train_images, train_labels, epochs=10, validation_split=0.2, shuffle=True)

**Verificação dos resultados do modelo:**

In [45]:
test_loss, test_acc = model.evaluate(test_images, test_labels)
print("Model - 3 layers - test loss:", test_loss * 100)
print("Model - 3 layers - test accuracy:", test_acc * 100)

Model - 3 layers - test loss: 36.77934731006622
Model - 3 layers - test accuracy: 86.29000186920166


-----------------------------
## Redução de Dimensionalidade Usando Principal component analysis (PCA)

**Preprocessamento de dados (Feature Scaling)**

In [46]:
scaler = StandardScaler()
scaler.fit(train_images)

train_images_r = scaler.transform(train_images)
test_images_r = scaler.transform(test_images)

**Definição do número de componentes:**

In [47]:
pca = PCA(n_components=3)

**Construção dos novos datasets de treino e de teste com dimensões reduzidas usando o PCA:**

In [48]:
pca.fit(train_images_r)

PCA(copy=True, iterated_power='auto', n_components=3, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)

In [49]:
train_images_r = pca.transform(train_images_r)
test_images_r = pca.transform(test_images_r)

**Preprocessamento: normalização e alteração do tipo dos dados de numpy array para pandas dataframe:**

In [50]:
#esse conjunto rr sera utilizado pela rede neural
train_images_rr = normalize(train_images_r)
test_images_rr = normalize(test_images_r)
train_images_rr = pd.DataFrame(data=train_images_rr)
test_images_rr = pd.DataFrame(data=test_images_rr)

#esse conjunto rr sera utilizado posteriormente para a clusterizacao 
train_images_r = pd.DataFrame(data=train_images_r)
test_images_r= pd.DataFrame(data=test_images_r)

**Definição da estrutura do modelo por meio do Keras Sequential que receberá os dados com dimensões reduzidas pelo PCA:**

In [51]:
model_r = keras.Sequential([
    keras.layers.Dense(64,activation=tf.nn.relu,input_dim=(3)),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dense(32, activation=tf.nn.softmax)
])

model_r.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             (None, 64)                256       
_________________________________________________________________
dense_13 (Dense)             (None, 128)               8320      
_________________________________________________________________
dense_14 (Dense)             (None, 32)                4128      
Total params: 12,704
Trainable params: 12,704
Non-trainable params: 0
_________________________________________________________________


**Compilação do modelo:**

In [52]:
model_r.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

**Normalização dos dados e Treino do modelo:**

In [53]:
model_r.fit(train_images_rr, train_labels, epochs=10, validation_split=0.2, shuffle=True)

Train on 48000 samples, validate on 12000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x13da6cad0>

**Verificação dos resultados do modelo:**

In [54]:
test_loss, test_acc = model_r.evaluate(test_images_rr, test_labels)
print("Model - 3 layers - test loss:", test_loss * 100)
print("Model - 3 layers - test accuracy:", test_acc * 100)

Model - 3 layers - test loss: 99.40232433319092
Model - 3 layers - test accuracy: 61.28000020980835


------------------------------------------------------

## Redução de Dimensionalidade Usando Autoencoders

In [55]:
# this is the size of our encoded representations
encoding_dim = 2
input_img = Input(shape=(784,))
encoded = Dense(encoding_dim, activation='relu')(input_img)
encoder = Model(input_img, encoded)
decoded = Dense(784, activation='sigmoid')(encoded)
autoencoder = Model(input_img, decoded)
encoded_input = Input(shape=(encoding_dim,))
encoded_input = Input(shape=(encoding_dim,))
decoder_layer = autoencoder.layers[-1]
decoder = Model(encoded_input, decoder_layer(encoded_input))
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')

In [56]:
# create a placeholder for an encoded (32-dimensional) input
encoded_input = Input(shape=(encoding_dim,))
# retrieve the last layer of the autoencoder model
decoder_layer = autoencoder.layers[-1]
# create the decoder model
decoder = Model(encoded_input, decoder_layer(encoded_input))

In [57]:
#REMOVER troquei o 'adadelta' pelo 'adam'
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

In [None]:
autoencoder.fit(train_images, train_images,epochs=10,shuffle=True,validation_data=(test_images, test_images))

In [59]:
encoded_imgs_train = encoder.predict(train_images)
encoded_imgs_test = encoder.predict(test_images)


In [60]:
train_images_a = pd.DataFrame(data=encoded_imgs_train)
test_images_a =pd.DataFrame(data=encoded_imgs_test)

In [61]:
model_r = keras.Sequential([
    keras.layers.Dense(64,activation=tf.nn.relu,input_dim=(2)),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dense(32, activation=tf.nn.softmax)
])

model_r.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 64)                192       
_________________________________________________________________
dense_16 (Dense)             (None, 128)               8320      
_________________________________________________________________
dense_17 (Dense)             (None, 32)                4128      
Total params: 12,640
Trainable params: 12,640
Non-trainable params: 0
_________________________________________________________________


In [62]:
model_r.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
model_r.fit(train_images_a, train_labels, epochs=10, validation_split=0.2, shuffle=True)

Train on 48000 samples, validate on 12000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10

In [None]:
test_loss, test_acc = model_r.evaluate(test_images_a, test_labels)
print("Model - 3 layers - test loss:", test_loss * 100)
print("Model - 3 layers - test accuracy:", test_acc * 100)

------------------------------------------------------

## Clustering com K-means

**Optamos pela utilização das imagens reduzidas por meio do PCA, já que obtiveram por meio deste modelo os melhores resultados de predição na rede neural.**

In [None]:
train_images_k = train_images_r
test_images_k = test_images_r

**Construção do modelo do k-means com 10 clusters.**

In [None]:
kmean_model = KMeans(n_clusters=10, random_state=10)
y_km = kmean_model.fit(train_images_k)
labels = kmean_model.labels_

In [None]:
cluster_centroids = kmean_model.cluster_centers_

In [None]:
silhouette_score(X = train_images_k, labels = labels, random_state = 10)

In [None]:
metrics.v_measure_score(train_labels.values.ravel(), labels, beta=1.0)

---------------------------------
## Clustering com o DBScan
