In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import cluster
from sklearn.metrics import confusion_matrix
from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [2]:
class DataVisuals:
    def __init__(self, dset, dlabel, predlabel, dsetname = 'MNIST', algor = 'KMeans'):
        self.algor = algor.lower()
        self.dset = dset
        self.dlabel = dlabel
        self.predlabel = predlabel
        self.dsetname = dsetname.lower()
        self.eva = {}
        if self.dsetname == 'mnist':
            self.dimension = (28, 28)
        elif self.dsetname == 'cifar10':
            self.dimension = (32,32,3)
        
    def view_rsample(self):
        fig=plt.figure(figsize=(6, 7))
        fig.suptitle(f'Random sample from the dataset', fontsize=14, fontweight='bold')
        a = []
        for img_index in range(1, 26):
            i = np.random.choice(range(len(self.dset)))
            img = self.dset[i]
            a.append(fig.add_subplot(5, 5, img_index))
            a[-1].set_title(self.dlabel[i])
            plt.axis('off')
            plt.imshow(img)
        plt.show()
            
    def view_lblsample(self, label):
        fig, a = plt.subplots(5, 5, figsize = (6, 7))
        fig.suptitle(f'Random sample from the {label}th cluster', fontsize=14, fontweight='bold')
        for img_x in range(5):
            for img_y in range(5):
                while True:
                    i = np.random.choice(range(len(self.dset)))
                    if label == self.predlabel[i]:
                        break
                a[img_x][img_y].set_title(self.dlabel[i])
                a[img_x][img_y].axis('off')
                a[img_x][img_y].imshow(self.dset[i])
                i += 1
        plt.show()
        
    def view_mean10(self):
        fig, a = plt.subplots(3, 4, figsize = (6, 5))
        fig.suptitle('Cluster means', fontsize=14, fontweight='bold')
        i = 0
        for img_x in range(3):
            for img_y in range(4):
                if (img_x == 2 and img_y == 0) or (img_x == 2 and img_y == 3):
                    a[img_x][img_y].axis('off')
                    continue
                a[img_x][img_y].axis('off')
                a[img_x][img_y].imshow(self.predlabel[i].reshape(self.dimension))
                i += 1
        plt.show()

In [None]:
    def scat(self, latent):
        pca = PCA(n_components=2)
        pca_result = pca.fit_transform(latent)
        
        fig, ax = plt.subplots(figsize=(10,10))
        colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

        for no, color in enumerate(colors):
            x = []
            y = []
            for i, label in enumerate(self.dlabel[:5000]):
                if(label == no):
                    x.append(pca_result[i, 0])
                    y.append(pca_result[i, 1])
            ax.scatter(x, y, c=color, label = no,
                         edgecolors='none') #alpha=0.3,

        ax.legend()
        #ax.grid(True)

        plt.show()

In [None]:
    def cm(self):
        # Confusion matrix for the predictions
        cm = confusion_matrix(self.dlabel, self.predlabel)

        fig_cnvtx, ax_cnvtx = plt.subplots()
        ax_cnvtx.matshow(cm) 
        ax_cnvtx.set_xlabel('cluster number') and ax_cnvtx.set_ylabel('labeled data')
        plt.show()

    def evaluate_km(self):
        self.eva['adjusted_rand_score'] = cluster.adjusted_rand_score(self.dlabel, self.model.labels_)
        self.eva['mutual_info_score'] = cluster.mutual_info_score(self.dlabel, self.model.labels_)
        self.eva['adjusted_mutual_info_score'] = cluster.adjusted_mutual_info_score(self.dlabel, self.model.labels_)
        self.eva['homogeneity_completeness_v_measure'] = cluster.homogeneity_completeness_v_measure(self.dlabel, self.model.labels_)
        self.eva['completeness_score'] = cluster.completeness_score(self.dlabel, self.model.labels_)
        self.eva['distortion'] = labels_pred.inertia_
        self.eva['Itritions'] = labels_pred.n_iter_

In [None]:
class Metrics:
    
    def __init__(self, y_true, y_pred):
        self.y_true = y_true
        self.y_pred = y_pred
        
        
    def nmi(self):
        return normalized_mutual_info_score(self.y_true, self.y_pred)
    
    
    def ari(self):
        return adjusted_rand_score(self.y_true, self.y_pred)

    
    def acc(self):
        """
        Calculate clustering accuracy. Require scikit-learn installed
        # Arguments
            y: true labels, numpy.array with shape `(n_samples,)`
            y_pred: predicted labels, numpy.array with shape `(n_samples,)`
        # Return
            accuracy, in [0,1]
        """
        self.y_true = self.y_true.astype(np.int64)
        assert self.y_pred.size == self.y_true.size
        D = max(self.y_pred.max(), self.y_true.max()) + 1
        w = np.zeros((D, D), dtype=np.int64)
        for i in range(self.y_pred.size):
            w[self.y_pred[i], self.y_true[i]] += 1
        from sklearn.utils.linear_assignment_ import linear_assignment
        ind = linear_assignment(w.max() - w)
        return sum([w[i, j] for i, j in ind]) * 1.0 / self.y_pred.size