In [None]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn import metrics
from sklearn.cluster import KMeans
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
from sklearn.cluster import KMeans
from sklearn.manifold import TSNE
import seaborn as sns
from matplotlib import offsetbox


%matplotlib inline

In [None]:
def plot_embedding(X, title=None):
    """funcao que realiza o plot de X, obtido pelo t-SNE
    Parametros
    ----------
    Inputs:
        X: vetores para plotar
        title (None): titulo opcional para o grafico
    Return:
        None
    """
    
    x_min, x_max = np.min(X, 0), np.max(X, 0)
    X = (X - x_min) / (x_max - x_min)

    plt.figure(figsize=(12,10))
    ax = plt.subplot(111)
    for i in range(X.shape[0]):
        plt.text(X[i, 0], X[i, 1], str(digits.target[i]),
                 color = plt.cm.Set1(y[i]),
                 fontdict={'weight': 'bold', 'size': 9})
    if title is not None:
        plt.title(title)

In [None]:
digits = load_digits()
digits

In [None]:
pixels = digits.data[4,:]
pixels

In [None]:
pixels = np.array(pixels, dtype='uint8')
pixels = pixels.reshape((8, 8))

# Plot
plt.imshow(pixels, cmap='gray')
plt.show()

In [None]:
X = scale(digits.data)
y = digits.target

In [None]:
X

In [None]:
y

In [None]:
print(X.shape)
print(y.shape)

In [None]:
tsne = TSNE(n_components = 2, perplexity = 50, early_exaggeration = 12, random_state = 42)

In [None]:
X_emb = tsne.fit_transform(X)

In [None]:
X_emb

In [None]:
plot_embedding(X_emb, "t-SNE embedding of the digits")

In [None]:
# clusterização dos dados
X_reduced = PCA(n_components=2, random_state = 42).fit_transform(X)
X_reduced

In [None]:
# rodando kmeans
kmeans = KMeans(n_clusters = 10, init = 'k-means++', max_iter = 300, n_init = 10, random_state = 42)
y_kmeans = kmeans.fit_predict(X_reduced)

In [None]:
# Plotando os clusters
plt.scatter(X_reduced[y_kmeans == 0, 0], X_reduced[y_kmeans == 0, 1], s = 100, c = 'red', label = '0')
plt.scatter(X_reduced[y_kmeans == 1, 0], X_reduced[y_kmeans == 1, 1], s = 100, c = 'blue', label = '1')
plt.scatter(X_reduced[y_kmeans == 2, 0], X_reduced[y_kmeans == 2, 1], s = 100, c = 'green', label = '2')
plt.scatter(X_reduced[y_kmeans == 3, 0], X_reduced[y_kmeans == 3, 1], s = 100, c = 'cyan', label = '3')
plt.scatter(X_reduced[y_kmeans == 4, 0], X_reduced[y_kmeans == 4, 1], s = 100, c = 'magenta', label = '4')
plt.scatter(X_reduced[y_kmeans == 5, 0], X_reduced[y_kmeans == 5, 1], s = 100, c = 'purple', label = '5')
plt.scatter(X_reduced[y_kmeans == 6, 0], X_reduced[y_kmeans == 6, 1], s = 100, c = 'brown', label = '6')
plt.scatter(X_reduced[y_kmeans == 7, 0], X_reduced[y_kmeans == 7, 1], s = 100, c = 'pink', label = '7')
plt.scatter(X_reduced[y_kmeans == 8, 0], X_reduced[y_kmeans == 8, 1], s = 100, c = 'gray', label = '8')
plt.scatter(X_reduced[y_kmeans == 9, 0], X_reduced[y_kmeans == 9, 1], s = 100, c = 'olive', label = '9')
plt.title('Clusters de números')
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.legend()
plt.show()