## Importando Bibliotecas

In [None]:
import tensorflow as tf 
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Input, Dense
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
from keras.optimizers import Adam
from keras.models import Model
import sklearn as sk
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score

## Pre-processamento dos dados
**Carregando os dados de treino e teste:**

In [None]:
train_images = pd.read_csv('../fashion-mnist_train.csv')
test_images = pd.read_csv('../fashion-mnist_test.csv')

**Separando as labels do conjuntos:**

In [None]:
train_labels=train_images.loc[:, ['label']]
train_images=train_images.drop(['label'], axis=1)
test_labels=test_images.loc[:, ['label']]
test_images=test_images.drop(['label'], axis=1)

**Normalizando os data-sets:**

In [None]:
train_images = train_images / 255.0
test_images = test_images / 255.0


----------------------
## Construção do modelo base
**Esta rede será utilizada posteriomente para comparações entre os modelos de dimensionalidade reduzida utilizando PCA e autoencoder.**

**Definição da estrutura do modelo por meio do Keras Sequential:**

In [None]:
model = keras.Sequential([
    keras.layers.Dense(64,activation=tf.nn.relu,input_dim=(784)),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])

model.summary()

**Compilação do modelo:**

In [None]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

**Treino do modelo:**

In [None]:
model.fit(train_images, train_labels, epochs=5, validation_split=0.2, shuffle=True)

**Verificação dos resultados do modelo:**

In [None]:
test_loss, test_acc = model.evaluate(test_images, test_labels)
print("Model - 3 layers - test loss:", test_loss * 100)
print("Model - 3 layers - test accuracy:", test_acc * 100)

-----------------------------
## Redução de Dimensionalidade Usando Principal component analysis (PCA)

**Definição do número de componentes:**

In [None]:
pca = PCA(n_components=3)

**Construção dos novos datasets de treino e de teste com dimensões reduzidas usando o PCA:**

In [None]:
train_images_r = pca.fit(train_images).transform(train_images)
test_images_r = pca.fit(test_images).transform(test_images)

**Alteração do tipo dos dados de numpy array para pandas dataframe:**

In [None]:
train_images_r = pd.DataFrame(data=train_images_r)
test_images_r= pd.DataFrame(data=test_images_r)

**Definição da estrutura do modelo por meio do Keras Sequential que receberá os dados com dimensões reduzidas pelo PCA:**

In [None]:
model_r = keras.Sequential([
    keras.layers.Dense(64,activation=tf.nn.relu,input_dim=(3)),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])

model_r.summary()

**Compilação do modelo:**

In [None]:
model_r.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

**Treino do modelo:**

In [None]:
model_r.fit(train_images_r, train_labels, epochs=5, validation_split=0.2, shuffle=True)

**Verificação dos resultados do modelo:**

In [None]:
test_loss, test_acc = model_r.evaluate(test_images_r, test_labels)
print("Model - 3 layers - test loss:", test_loss * 100)
print("Model - 3 layers - test accuracy:", test_acc * 100)

------------------------------------------------------

## Redução de Dimensionalidade Usando Autoencoders

In [None]:
# this is the size of our encoded representations
encoding_dim = 2
input_img = Input(shape=(784,))
encoded = Dense(encoding_dim, activation='relu')(input_img)
encoder = Model(input_img, encoded)
decoded = Dense(784, activation='sigmoid')(encoded)
autoencoder = Model(input_img, decoded)
encoded_input = Input(shape=(encoding_dim,))
encoded_input = Input(shape=(encoding_dim,))
decoder_layer = autoencoder.layers[-1]
decoder = Model(encoded_input, decoder_layer(encoded_input))
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')

In [None]:
# create a placeholder for an encoded (32-dimensional) input
encoded_input = Input(shape=(encoding_dim,))
# retrieve the last layer of the autoencoder model
decoder_layer = autoencoder.layers[-1]
# create the decoder model
decoder = Model(encoded_input, decoder_layer(encoded_input))

In [None]:
#REMOVER troque o 'adadelta' pelo 'adam'
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

In [None]:
autoencoder.fit(train_images, train_images,epochs=5,shuffle=True,validation_data=(test_images, test_images))

In [None]:
encoded_imgs_train = encoder.predict(train_images)
encoded_imgs_test = encoder.predict(test_images)


In [None]:
train_images_a = pd.DataFrame(data=encoded_imgs_train)
test_images_a =pd.DataFrame(data=encoded_imgs_test)

In [None]:
model_r = keras.Sequential([
    keras.layers.Dense(64,activation=tf.nn.relu,input_dim=(2)),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])

model_r.summary()

In [None]:
model_r.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
model_r.fit(train_images_a, train_labels, epochs=5, validation_split=0.2, shuffle=True)

In [None]:
test_loss, test_acc = model_r.evaluate(test_images_a, test_labels)
print("Model - 3 layers - test loss:", test_loss * 100)
print("Model - 3 layers - test accuracy:", test_acc * 100)

------------------------------------------------------

## Clustering com K-means

Optamos pela utilização das imagens reduzidas por meio do PCA, já que obtiveram por meio deste modelo os melhores resultados de predição na rede neural.

In [None]:
train_images_k = train_images_r
test_images_k = test_images_r

In [None]:
kmean_model = KMeans(n_clusters=10, random_state=10)
y_km = kmean_model.fit(train_images_k)
labels = kmean_model.labels_

In [None]:
cluster_centroids = kmean_model.cluster_centers_

In [None]:
#silhouette_avg = silhouette_score(X = train_images_k, labels = train_labels.values.ravel(), random_state=10)

In [None]:
silhouette_avg = silhouette_score(X = train_images_k.values.ravel, labels = labels)

In [None]:
silhouette_avg

---------------------------------
## Clustering com o DBScan
