# **Load dependencies**

In [2]:
from time import time
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import keras.backend as K
import os
import pandas as pd
from keras.layers import Dense, Input, Layer, InputSpec
from keras.models import Model
from keras.optimizers import SGD
from keras.utils.vis_utils import plot_model
from keras import callbacks
from keras.initializers import VarianceScaling
from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn.metrics.cluster import normalized_mutual_info_score as nmi, adjusted_rand_score as ari
from scipy.optimize import linear_sum_assignment as linear_assignment
import plotly.io as pio
import plotly.graph_objects as go
from sklearn.decomposition import PCA
import tensorflow as tf
from sklearn.cluster import KMeans, MeanShift, DBSCAN, Birch

#**IDEC**

In [31]:
def autoencoder(dims, act='relu'):
    """
    Fully connected auto-encoder model, symmetric.
    Arguments:
        dims: list of number of units in each layer of encoder. dims[0] is input dim, dims[-1] is units in hidden layer.
            The decoder is symmetric with encoder. So number of layers of the auto-encoder is 2*len(dims)-1
        act: activation, not applied to Input, Hidden and Output layers
    return:
        Model of autoencoder
    """
    n_stacks = len(dims) - 1
    # input
    x = Input(shape=(dims[0],), name='input')
    h = x

    # internal layers in encoder
    for i in range(n_stacks-1):
        h = Dense(dims[i + 1], activation=act, name='encoder_%d' % i)(h)

    # hidden layer
    h = Dense(dims[-1], name='encoder_%d' % (n_stacks - 1))(h)  # hidden layer, features are extracted from here

    # internal layers in decoder
    for i in range(n_stacks-1, 0, -1):
        h = Dense(dims[i], activation=act, name='decoder_%d' % i)(h)

    # output
    h = Dense(dims[0], name='decoder_0')(h)

    return Model(inputs=x, outputs=h)


def acc(y_true, y_pred):
    """
    Calculate clustering accuracy. Require scikit-learn installed
    # Arguments
        y: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
    # Return
        accuracy, in [0,1]
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    ind = linear_assignment(w.max() - w)
    k = 0
    somma = 0

    for i in ind[0]:
      j = ind[1][k]
      somma += w[i,j]
      k += 1 
    
    return somma / y_pred.size

def load_mnist():
    # the data, shuffled and split between train and test sets
    from keras.datasets import mnist
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x = np.concatenate((x_train, x_test))
    y = np.concatenate((y_train, y_test))
    x = x.reshape((x.shape[0], -1))
    x = np.divide(x, 50.)  # normalize as it does in DEC paper
    print('MNIST samples', x.shape)
    return x, y


class ClusteringLayer(Layer):
    """
    Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the
    sample belonging to each cluster. The probability is calculated with student's t-distribution.
    # Example
    ```
        model.add(ClusteringLayer(n_clusters=10))
    ```
    # Arguments
        n_clusters: number of clusters.
        weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers.
        alpha: parameter in Student's t-distribution. Default to 1.0.
    # Input shape
        2D tensor with shape: `(n_samples, n_features)`.
    # Output shape
        2D tensor with shape: `(n_samples, n_clusters)`.
    """

    def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        super(ClusteringLayer, self).__init__(**kwargs)
        self.n_clusters = n_clusters
        self.alpha = alpha
        self.initial_weights = weights
        self.input_spec = InputSpec(ndim=2)

    def build(self, input_shape):
        assert len(input_shape) == 2
        input_dim = input_shape[1]
        self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
        self.clusters = self.add_weight(shape=(self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
        self.built = True

    def call(self, inputs, **kwargs):
        """ student t-distribution, as same as used in t-SNE algorithm.
                 q_ij = 1/(1+dist(x_i, u_j)^2), then normalize it.
        Arguments:
            inputs: the variable containing data, shape=(n_samples, n_features)
        Return:
            q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters)
        """
        q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
        q **= (self.alpha + 1.0) / 2.0
        q = K.transpose(K.transpose(q) / K.sum(q, axis=1))
        return q

    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) == 2
        return input_shape[0], self.n_clusters

    def get_config(self):
        config = {'n_clusters': self.n_clusters}
        base_config = super(ClusteringLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))


class IDEC(object):
    def __init__(self,
                 dims,
                 n_clusters=10,
                 alpha=1.0,
                 batch_size=256):

        super(IDEC, self).__init__()

        self.dims = dims
        self.input_dim = dims[0]
        self.n_stacks = len(self.dims) - 1

        self.n_clusters = n_clusters
        self.alpha = alpha
        self.batch_size = batch_size
        self.autoencoder = autoencoder(self.dims)

    def initialize_model(self, ae_weights=None, gamma=0.1, optimizer='adam'):
        if ae_weights is not None:
            self.autoencoder.load_weights('/content/mnist_ae_weights.h5')
            print('Pretrained AE weights are loaded successfully.')
        else:
            print('ae_weights must be given. E.g.')
            print('python IDEC.py mnist --ae_weights weights.h5')
            exit()

        hidden = self.autoencoder.get_layer(name='encoder_%d' % (self.n_stacks - 1)).output
        self.encoder = Model(inputs=self.autoencoder.input, outputs=hidden)

        # prepare IDEC model
        clustering_layer = ClusteringLayer(self.n_clusters, name='clustering')(hidden)
        self.model = Model(inputs=self.autoencoder.input,
                           outputs=[clustering_layer, self.autoencoder.output])
        self.model.compile(loss={'clustering': 'kld', 'decoder_0': 'mse'},
                           loss_weights=[gamma, 1],
                           optimizer=optimizer)

    def load_weights(self, weights_path):  # load weights of IDEC model
        self.model.load_weights(weights_path)

    def extract_feature(self, x):  # extract features from before clustering layer
        encoder = Model(self.model.input, self.model.get_layer('encoder_%d' % (self.n_stacks - 1)).output)
        return encoder.predict(x)

    def predict_clusters(self, x):  # predict cluster labels using the output of clustering layer
        q, _ = self.model.predict(x, verbose=0)
        return q.argmax(1)

    @staticmethod
    def target_distribution(q):  # target distribution P which enhances the discrimination of soft label Q
        weight = q ** 2 / q.sum(0)
        return (weight.T / weight.sum(1)).T

    def clustering(self, x, y=None,
                   tol=1e-3,
                   update_interval=140,
                   maxiter=2e4,
                   save_dir='/content'):

        print('Update interval'), update_interval
        save_interval = x.shape[0] / self.batch_size * 5  # 5 epochs
        print('Save interval'), save_interval

        # initialize cluster centers using k-means
        print('Initializing cluster centers with k-means.')
        kmeans = KMeans(n_clusters=self.n_clusters, n_init=20)
        y_pred = kmeans.fit_predict(self.encoder.predict(x))
        y_pred_last = y_pred
        self.model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])
        '''
        # logging file
        import csv, os
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        
        logfile = csv.file(save_dir + '/idec_log.csv', 'wb')
        
        logwriter = csv.DictWriter(logfile, fieldnames=['iter', 'acc', 'nmi', 'ari', 'L', 'Lc', 'Lr'])
        logwriter.writeheader()
        '''
        loss = [0, 0, 0]
        index = 0
        for ite in range(int(maxiter)):
            if ite % update_interval == 0:
                q, _ = self.model.predict(x, verbose=0)
                p = self.target_distribution(q)  # update the auxiliary target distribution p

                # evaluate the clustering performance
                y_pred = q.argmax(1)
                delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0]
                y_pred_last = y_pred
                if y is not None:
                    acc_log = np.round(acc(y, np.array(y_pred)), 5)
                    nmi_log = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
                    ari_log = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
                    loss_log = np.round(loss, 5)
                    #logdict = dict(iter=ite, acc=acc, nmi=nmi, ari=ari, L=loss[0], Lc=loss[1], Lr=loss[2])
                    #logwriter.writerow(logdict)
                    print( 'Iter', ite, ': Acc', acc_log, ', nmi', nmi_log, ', ari', ari_log, '; loss=', loss_log)

                # check stop criterion
                if ite > 0 and delta_label < tol:
                    print('delta_label '), delta_label, '< tol ', tol
                    print('Reached tolerance threshold. Stopping training.')
                    #logfile.close()
                    break

            # train on batch
            if (index + 1) * self.batch_size > x.shape[0]:
                loss = self.model.train_on_batch(x=x[index * self.batch_size::],
                                                 y=[p[index * self.batch_size::], x[index * self.batch_size::]])
                index = 0
            else:
                loss = self.model.train_on_batch(x=x[index * self.batch_size:(index + 1) * self.batch_size],
                                                 y=[p[index * self.batch_size:(index + 1) * self.batch_size],
                                                    x[index * self.batch_size:(index + 1) * self.batch_size]])
                index += 1

            # save intermediate model
            if ite % save_interval == 0:
                # save IDEC model checkpoints
                print('saving model to:'), save_dir + '/IDEC_model_' + str(ite) + '.h5'
                self.model.save_weights(save_dir + '/IDEC_model_' + str(ite) + '.h5')

            ite += 1

        # save the trained model
        #logfile.close()
        print('saving model to:'), save_dir + '/IDEC_model_final.h5'
        self.model.save_weights(save_dir + '/IDEC_model_final.h5')
        
        return y_pred

# **Load data**

In [4]:
x, y = load_mnist()
n_clusters = len(np.unique(y))

MNIST samples (70000, 784)


# **Set parameters**

In [5]:
batch_size = 256
ae_weights = '/content/mnist_ae_weights.h5'
gamma = 0.1
optimizer = 'adam'
tol = 1e-3
update_interval = 140
maxiter = 2e4
save_dir = '/content'

# **Run model**

In [32]:
idec = IDEC(dims=[x.shape[-1], 500, 500, 2000, 10], n_clusters=n_clusters, batch_size=batch_size)
idec.initialize_model(ae_weights=ae_weights, gamma=gamma, optimizer=optimizer)


Pretrained AE weights are loaded successfully.


In [33]:
# begin clustering, time not include pretraining part.
t0 = time()
y_pred = idec.clustering(x, y=y, tol=tol, maxiter=maxiter,
                         update_interval=update_interval, save_dir=save_dir)
print('acc:', acc(y, y_pred))
print('clustering time: ', (time() - t0))

Update interval
Save interval
Initializing cluster centers with k-means.
Iter 0 : Acc 0.81859 , nmi 0.74761 , ari 0.70475 ; loss= [0 0 0]
saving model to:
Iter 140 : Acc 0.81429 , nmi 0.74101 , ari 0.69594 ; loss= [0.33045 0.12205 0.31824]
Iter 280 : Acc 0.81457 , nmi 0.74375 , ari 0.69641 ; loss= [0.4037  0.18083 0.38561]
Iter 420 : Acc 0.81836 , nmi 0.75052 , ari 0.70449 ; loss= [0.37462 0.19914 0.35471]
Iter 560 : Acc 0.82316 , nmi 0.757 , ari 0.71435 ; loss= [0.39101 0.23201 0.36781]
Iter 700 : Acc 0.82754 , nmi 0.7652 , ari 0.72259 ; loss= [0.36228 0.21679 0.3406 ]
Iter 840 : Acc 0.83241 , nmi 0.77193 , ari 0.7311 ; loss= [0.33855 0.22952 0.3156 ]
Iter 980 : Acc 0.83624 , nmi 0.77834 , ari 0.74008 ; loss= [0.32477 0.2257  0.3022 ]
Iter 1120 : Acc 0.83881 , nmi 0.78319 , ari 0.74425 ; loss= [0.34869 0.25381 0.32331]
Iter 1260 : Acc 0.84 , nmi 0.78831 , ari 0.74847 ; loss= [0.33281 0.22696 0.31011]
Iter 1400 : Acc 0.8431 , nmi 0.792 , ari 0.75395 ; loss= [0.33033 0.23982 0.30635]
It

Visualize IDEC clusters

In [39]:
Z_idec = idec.extract_feature(x)
x_tsne_idec = TSNE(n_components=2).fit_transform(Z_idec)
vis_x_idec = x_tsne_idec[:, 0]
vis_y_idec = x_tsne_idec[:, 1]



In [40]:
data = [
    go.Scatter(
        x= vis_x_idec[:10000], 
        y= vis_y_idec[:10000], 
        mode="markers",
        showlegend=False,
        marker=dict(
            size=6,
            color = y[:10000],
            colorscale=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"],
            showscale=True, 
            line = dict(
                width = 1,
                color = 'rgb(255, 255, 255)'
            )))]

layout = go.Layout(
    title= 'Clusters with k-means on IDEC features',
    hovermode= 'closest',
    xaxis= dict(
         title= 'First principal component',
        ticklen= 8,
        zeroline= False,
        gridwidth= 0.1,
    ),
    yaxis=dict(
        title= 'Second principal component',
        ticklen= 8,
        gridwidth= 0.1,
    ),
    showlegend= True
)

fig = dict(data = data, layout = layout)
pio.show(fig, filename="IDEC_features_cluster_plot")

#**DEC**

In [34]:
class DEC(object):
    def __init__(self,
                 dims,
                 n_clusters=10,
                 alpha=1.0,
                 batch_size=256):

        super(DEC, self).__init__()

        self.dims = dims
        self.input_dim = dims[0]
        self.n_stacks = len(self.dims) - 1

        self.n_clusters = n_clusters
        self.alpha = alpha
        self.batch_size = batch_size
        self.autoencoder = autoencoder(self.dims)

    def initialize_model(self, optimizer, ae_weights=None):
        if ae_weights is not None:  # load pretrained weights of autoencoder
            self.autoencoder.load_weights('/content/mnist_ae_weights.h5')
        else:
            print('ae_weights must be given. E.g.')
            print('python DEC.py mnist --ae_weights weights.h5')
            exit()

        hidden = self.autoencoder.get_layer(name='encoder_%d' % (self.n_stacks - 1)).output
        self.encoder = Model(inputs=self.autoencoder.input, outputs=hidden)

        # prepare DEC model
        clustering_layer = ClusteringLayer(self.n_clusters, name='clustering')(hidden)
        self.model = Model(inputs=self.autoencoder.input, outputs=clustering_layer)
        self.model.compile(loss='kld', optimizer=optimizer)

    def load_weights(self, weights_path):  # load weights of DEC model
        self.model.load_weights(weights_path)

    def extract_feature(self, x):  # extract features from before clustering layer
        encoder = Model(self.model.input, self.model.get_layer('encoder_%d' % (self.n_stacks - 1)).output)
        return encoder.predict(x)

    def predict_clusters(self, x):  # predict cluster labels using the output of clustering layer
        q = self.model.predict(x, verbose=0)
        return q.argmax(1)

    @staticmethod
    def target_distribution(q):
        weight = q ** 2 / q.sum(0)
        return (weight.T / weight.sum(1)).T

    def clustering(self, x, y=None,
                   tol=1e-3,
                   update_interval=140,
                   maxiter=2e4,
                   save_dir='/content/'):

        print('Update interval', update_interval)
        save_interval = x.shape[0] / self.batch_size * 5  # 5 epochs
        print('Save interval', save_interval)

        # initialize cluster centers using k-means
        print('Initializing cluster centers with k-means.')
        kmeans = KMeans(n_clusters=self.n_clusters, n_init=20)
        y_pred = kmeans.fit_predict(self.encoder.predict(x))
        y_pred_last = y_pred
        self.model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])

        # logging file
        import csv, os
        '''
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        logfile = file(save_dir + '/dec_log.csv', 'wb')
        logwriter = csv.DictWriter(logfile, fieldnames=['iter', 'acc', 'nmi', 'ari', 'L'])
        logwriter.writeheader()
        '''
        loss = 0
        index = 0
        for ite in range(int(maxiter)):
            if ite % update_interval == 0:
                q = self.model.predict(x, verbose=0)
                p = self.target_distribution(q)  # update the auxiliary target distribution p

                # evaluate the clustering performance
                y_pred = q.argmax(1)
                delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0]
                y_pred_last = y_pred
                if y is not None:
                    acc_log = np.round(acc(y, y_pred), 5)
                    nmi_log = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
                    ari_log = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
                    loss_log = np.round(loss, 5)
                    #logdict = dict(iter=ite, acc=acc_log, nmi=nmi_log, ari=ari_log, L=loss_log)
                    #logwriter.writerow(logdict)
                    print('Iter', ite, ': Acc', acc_log, ', nmi', nmi_log, ', ari', ari_log, '; loss=', loss_log)

                # check stop criterion
                if ite > 0 and delta_label < tol:
                    print('delta_label ', delta_label, '< tol ', tol)
                    print('Reached tolerance threshold. Stopping training.')
                    #logfile.close()
                    break

            # train on batch
            if (index + 1) * self.batch_size > x.shape[0]:
                loss = self.model.train_on_batch(x=x[index * self.batch_size::],
                                                 y=p[index * self.batch_size::])
                index = 0
            else:
                loss = self.model.train_on_batch(x=x[index * self.batch_size:(index + 1) * self.batch_size],
                                                 y=p[index * self.batch_size:(index + 1) * self.batch_size])
                index += 1

            # save intermediate model
            if ite % save_interval == 0:
                # save IDEC model checkpoints
                print('saving model to:', save_dir + '/DEC_model_' + str(ite) + '.h5')
                self.model.save_weights(save_dir + '/DEC_model_' + str(ite) + '.h5')

            ite += 1

        # save the trained model
        #logfile.close()
        print('saving model to:', save_dir + '/DEC_model_final.h5')
        self.model.save_weights(save_dir + '/DEC_model_final.h5')

        return y_pred


In [35]:
dec = DEC(dims=[x.shape[-1], 500, 500, 2000, 10], n_clusters=n_clusters, batch_size=batch_size)

dec.initialize_model(optimizer=SGD(lr=0.01, momentum=0.9),
                      ae_weights=ae_weights)
plot_model(dec.model, to_file='dec_model.png', show_shapes=True)
dec.model.summary()


  super().__init__(name, **kwargs)


Model: "model_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 784)]             0         
                                                                 
 encoder_0 (Dense)           (None, 500)               392500    
                                                                 
 encoder_1 (Dense)           (None, 500)               250500    
                                                                 
 encoder_2 (Dense)           (None, 2000)              1002000   
                                                                 
 encoder_3 (Dense)           (None, 10)                20010     
                                                                 
 clustering (ClusteringLayer  (None, 10)               100       
 )                                                               
                                                          

In [36]:
t0 = time()
y_pred = dec.clustering(x, y=y, tol=tol, maxiter=maxiter,
                        update_interval=update_interval, save_dir=save_dir)
print('acc:', acc(y, y_pred))
print('clustering time: ', (time() - t0))

Update interval 140
Save interval 1367.1875
Initializing cluster centers with k-means.
Iter 0 : Acc 0.8186 , nmi 0.74753 , ari 0.70462 ; loss= 0
saving model to: /content/DEC_model_0.h5
Iter 140 : Acc 0.82553 , nmi 0.75806 , ari 0.71726 ; loss= 0.03492
Iter 280 : Acc 0.82876 , nmi 0.76588 , ari 0.72617 ; loss= 0.09382
Iter 420 : Acc 0.83519 , nmi 0.77881 , ari 0.74008 ; loss= 0.10191
Iter 560 : Acc 0.84186 , nmi 0.79207 , ari 0.75456 ; loss= 0.10438
Iter 700 : Acc 0.84749 , nmi 0.80115 , ari 0.76572 ; loss= 0.09377
Iter 840 : Acc 0.85037 , nmi 0.80649 , ari 0.77151 ; loss= 0.08328
Iter 980 : Acc 0.8535 , nmi 0.81183 , ari 0.77789 ; loss= 0.07853
Iter 1120 : Acc 0.85563 , nmi 0.81567 , ari 0.78218 ; loss= 0.07452
Iter 1260 : Acc 0.85751 , nmi 0.81958 , ari 0.78633 ; loss= 0.08128
Iter 1400 : Acc 0.85886 , nmi 0.82209 , ari 0.78927 ; loss= 0.09509
Iter 1540 : Acc 0.85977 , nmi 0.82358 , ari 0.791 ; loss= 0.06594
Iter 1680 : Acc 0.86054 , nmi 0.82529 , ari 0.79266 ; loss= 0.05737
Iter 182

Visualize Clusters

In [37]:
Z_dec = dec.extract_feature(x)
x_tsne = TSNE(n_components=2).fit_transform(Z_dec)
vis_x_dec = x_tsne[:, 0]
vis_y_dec = x_tsne[:, 1]



In [38]:
data = [
    go.Scatter(
        x= vis_x_dec[:10000], 
        y= vis_y_dec[:10000], 
        mode="markers",
        showlegend=False,
        marker=dict(
            size=6,
            color = y[:10000],
            colorscale=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"],
            showscale=True, 
            line = dict(
                width = 1,
                color = 'rgb(255, 255, 255)'
            )))]

layout = go.Layout(
    title= 'Clusters with k-means on DEC features',
    hovermode= 'closest',
    xaxis= dict(
         title= 'First principal component',
        ticklen= 8,
        zeroline= False,
        gridwidth= 0.1,
    ),
    yaxis=dict(
        title= 'Second principal component',
        ticklen= 8,
        gridwidth= 0.1,
    ),
    showlegend= True
)

fig = dict(data = data, layout = layout)
pio.show(fig, filename="DEC_features_cluster_plot")

Classic Clusterig

In [46]:
pca = PCA()
Z_pca = pca.fit_transform(x)
kmeans = KMeans(n_clusters = 10, n_init=20)
y_pred_PCA_kmeans = kmeans.fit_predict(Z_pca)
acc(y, y_pred_PCA_kmeans), nmi(y,y_pred_PCA_kmeans)

(0.5348285714285714, 0.49990657785489706)

In [47]:
x_tsne_pca = TSNE(n_components=2).fit_transform(Z_pca)
vis_x_pca = x_tsne_pca[:, 0]
vis_y_pca = x_tsne_pca[:, 1]

In [48]:
data = [
    go.Scatter(
        x= vis_x_pca[:10000], 
        y= vis_y_pca[:10000], 
        mode="markers",
        showlegend=False,
        marker=dict(
            size=6,
            color = y[:10000],
            colorscale=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"],
            showscale=True, 
            line = dict(
                width = 1,
                color = 'rgb(255, 255, 255)'
            )))]

layout = go.Layout(
    title= 'Cluster with k-means on PCA features',
    hovermode= 'closest',
    xaxis= dict(
         title= 'First principal TSNE component',
        ticklen= 8,
        zeroline= False,
        gridwidth= 0.1,
    ),
    yaxis=dict(
        title= 'Second principal TSNE component',
        ticklen= 8,
        gridwidth= 0.1,
    ),
    showlegend= True
)
import plotly.io as pio

fig = dict(data = data, layout = layout)
pio.show(fig, filename="PCA_features_cluster_plot")