In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; } .output { align-items: center; }</style>"))

In [1]:
import numpy as np
from scipy.spatial import distance_matrix

def pmatrix(self, som_map=None, color='Viridis', idata=[], perc=18, interp='best', title=''):
    in_distmat = distance_matrix(idata, idata)
    radius = np.percentile(in_distmat, perc)
    
    distmat = distance_matrix(self.weights, idata)

    pm = distmat <= radius
    pm = np.sum(pm, axis=-1)

    if som_map == None:
        return self.plot(pm.reshape(self.m, self.n), color=color, interp=interp, title=title)    
    else:
        som_map.data[0].z = pm.reshape(self.m,self.n)

In [2]:
from scipy.ndimage import correlate

def umatrix(self, som_map=None, color='Viridis', interp='best', title=''):
    m, n = self.m, self.n
    mm, nn = m * 2 - 1, n * 2 - 1
    
    idx1 = np.kron(np.arange(m * n).reshape(m, n), np.ones((2, 2)))
    
    k1 = np.array([0, 1] * n)[None, :]
    k2 = np.array([0, n] * m)[:, None]
    idx2 = idx1 + k1 + k2
    
    idx1 = idx1[:-1, :-1].astype(np.int32).ravel()
    idx2 = idx2[:-1, :-1].astype(np.int32).ravel()
    
    dist = np.sqrt(np.sum(np.square(self.weights[idx1] - self.weights[idx2]), axis=-1))
    um = dist.reshape(mm, nn)
    
    w = [[0, 1, 0],
         [1, 0, 1],
         [0, 1, 0]]
    
    corr = correlate(um, w) / 4
    
    mask = np.zeros((mm, nn), dtype=np.bool)
    
    for i in range(mm):
        for j in range(nn):
            mask[i, j] = i & 1 == j & 1
            
    um[mask] = corr[mask]

    if som_map == None:
        return self.plot(um, color=color, interp=interp, title=title)    
    else:
        som_map.data[0].z = um

In [3]:
from scipy.ndimage import zoom

def usmatrix(self, som_map=None, color='Viridis', idata=[], perc=10, interp='best', title=''):
    pm = self.pmatrix(idata=idata, perc=perc).data[0].z
    um = self.umatrix().data[0].z
    
    pm = zoom(pm, [u / p for u, p in zip(um.shape, pm.shape)])
    
    usm = um * (pm - np.mean(pm)) / (np.mean(pm) - np.min(pm))
    
    if som_map == None:
        return self.plot(usm, color=color, interp=interp, title=title)    
    else:
        som_map.data[0].z = usm

In [4]:
def qe(self, som_map=None, color='Viridis', idata=[], interp='best', title=''):
    dist = distance_matrix(self.weights, idata)
    bmu = np.argmin(dist, axis=0)

    idx = np.arange(self.m * self.n)[:, None] == bmu[None, :]
    qe = np.sum(np.where(idx, dist, 0), axis=-1)

    if som_map == None:
        return self.plot(qe.reshape(self.m, self.n), color=color, interp=interp, title=title)    
    else:
        som_map.data[0].z = qe.reshape(self.m,self.n)

In [5]:
def mqe(self, som_map=None, color='Viridis', idata=[], interp='best', title=''):
    dist = distance_matrix(self.weights, idata)
    bmu = np.argmin(dist, axis=0)

    idx = np.arange(self.m * self.n)[:, None] == bmu[None, :]
    qe = np.sum(np.where(idx, dist, 0), axis=-1)

    count = np.sum(idx, axis=-1)
    mqe = np.divide(qe, count, out=np.zeros_like(qe), where=count != 0)

    if som_map == None:
        return self.plot(mqe.reshape(self.m, self.n), color=color, interp=interp, title=title)    
    else:
        som_map.data[0].z = mqe.reshape(self.m,self.n)

In [6]:
import plotly.graph_objects as go
from ipywidgets import Layout, HBox, Box, widgets, interact


class SomViz:
    def __init__(self, weights=[], m=None, n=None):
        self.weights = weights
        self.m = m
        self.n = n
        
    umatrix = umatrix
    pmatrix = pmatrix
    usmatrix = usmatrix
    qe = qe
    mqe = mqe

    # comment here on umatrix -> dmatrix
    def dmatrix(self, som_map=None, color="Viridis", interp="best", title=""):
        um = np.zeros((self.m * self.n, 1))
        neuron_locs = list()
        for i in range(self.m):
            for j in range(self.n):
                neuron_locs.append(np.array([i, j]))
        neuron_distmat = distance_matrix(neuron_locs,neuron_locs)

        for i in range(self.m * self.n):
            neighbor_idxs = neuron_distmat[i] <= 1
            neighbor_weights = self.weights[neighbor_idxs]
            um[i] = distance_matrix(np.expand_dims(self.weights[i], 0), neighbor_weights).mean()

        if som_map == None: return self.plot(um.reshape(self.m,self.n), color=color, interp=interp, title=title)    
        else: som_map.data[0].z = um.reshape(self.m,self.n)
            

    def hithist(self, som_map=None, idata=[], color="RdBu", interp="best", title=""):
        hist = [0] * self.n * self.m
        for v in idata: 
            position = np.argmin(np.sqrt(np.sum(np.power(self.weights - v, 2), axis=1)))
            hist[position] += 1    
        
        if som_map == None: return self.plot(np.array(hist).reshape(self.m,self.n), color=color, interp=interp, title=title)        
        else: som_map.data[0].z = np.array(hist).reshape(self.m,self.n)


    def component_plane(self, som_map=None, component=0, color="Viridis", interp = "best", title=""):
        if som_map == None: return self.plot(self.weights[:,component].reshape(-1,self.n), color=color, interp=interp, title=title)   
        else: som_map.data[0].z = self.weights[:,component].reshape(-1,n)


    def sdh(self, som_map=None, idata=[], sdh_type=1, factor=1, draw=True, color="Cividis", interp = "best", title=""):
        import heapq
        sdh_m = [0] * self.m * self.n

        cs = 0
        for i in range(0, factor): cs += factor - i

        for vector in idata:
            dist = np.sqrt(np.sum(np.power(self.weights - vector, 2), axis=1))
            c = heapq.nsmallest(factor, range(len(dist)), key=dist.__getitem__)
            if (sdh_type == 1): 
                for j in range(0, factor): sdh_m[c[j]] += (factor - j) / cs  # normalized
            if (sdh_type == 2):
                for j in range(0, factor): sdh_m[c[j]] += 1.0 / dist[c[j]]  # based on distance
            if (sdh_type == 3): 
                dmin = min(dist)
                for j in range(0, factor): sdh_m[c[j]] += 1.0 - (dist[c[j]] - dmin) / (max(dist) - dmin)  

        if som_map == None: return self.plot(np.array(sdh_m).reshape(-1,self.n), color=color, interp=interp, title=title)      
        else: som_map.data[0].z = np.array(sdh_m).reshape(-1,self.n)

    
    def project_data(self, som_m=None, idata=[], title=""):
        data_y = []
        data_x = []
        for v in idata:
            position = np.argmin(np.sqrt(np.sum(np.power(self.weights - v, 2), axis=1)))
            x,y = position % self.n, position // self.n
            data_x.extend([x])
            data_y.extend([y])
            
        if som_m != None: som_m.add_trace(go.Scatter(x=data_x, y=data_y, mode="markers", marker_color="rgba(255, 255, 255, 0.8)"))
    

    def time_series(self, som_m=None, idata=[], wsize=50, title=""):
        data_y = []
        data_x = [i for i in range(0,len(idata))]
        
        data_x2 = []
        data_y2 = []
        
        qmin = np.Inf
        qmax = 0
        
        step=1
        
        ps = []
        for v in idata:
            matrix = np.sqrt(np.sum(np.power(self.weights - v, 2), axis=1))
            position = np.argmin(matrix)
            qerror = matrix[position]
            if qmin > qerror: qmin = qerror
            if qmax < qerror: qmax = qerror
            ps.append((position, qerror))
       
        markerc=[]    
        for v in ps:
            data_y.extend([v[0]])
            rez = v[1] / qmax
 
            markerc.append('rgba(0, 0, 0, '+str(rez)+')') 
            
            x,y = v[0] % self.n, v[0] // self.n 
            if    x == 0: y = np.random.uniform(low=y, high=y+.1)
            elif  x == self.m - 1: y = np.random.uniform(low=y-.1, high=y)
            elif  y == 0: x = np.random.uniform(low=x, high=x+.1)
            elif  y == self.n - 1: x = np.random.uniform(low=x-.1, high=x)
            else: x, y = np.random.uniform(low=x-.1, high=x+.1), np.random.uniform(low=y-.1, high=y+.1)                           
            
            data_x2.extend([x])
            data_y2.extend([y]) 
    
        ts_plot = go.FigureWidget(go.Scatter(x=[], y=[], mode="markers", marker_color=markerc, marker=dict(colorscale="Viridis", showscale=True, color=np.random.randn(500))))
        ts_plot.update_xaxes(range=[0, wsize])       

        ts_plot.data[0].x, ts_plot.data[0].y = data_x, data_y
        som_m.add_trace(go.Scatter(x=data_x2, y=data_y2, mode="markers"))
  
        som_m.layout.height = 500
        ts_plot.layout.height = 500
        som_m.layout.width = 500
        ts_plot.layout.width = 1300
        
        return HBox([go.FigureWidget(som_m), go.FigureWidget(ts_plot)])


    def plot(self, matrix, color="Viridis", interp = "best", title=""):
        return go.FigureWidget(go.Heatmap(z=matrix, zsmooth=interp, colorscale=color), layout=go.Layout(width=700*self.n/self.m,title=title, height=700, title_x=0.5))

In [7]:
import pandas as pd
import gzip


class SOMToolBox_Parse:
    def __init__(self, filename):
        self.filename = filename


    def read_weight_file(self,):
        df = pd.DataFrame()
        if self.filename[-3:len(self.filename)] == '.gz':
            with gzip.open(self.filename, 'rb') as file:
                df, vec_dim, xdim, ydim = self._read_vector_file_to_df(df, file)
        else:
            with open(self.filename, 'rb') as file:
                df, vec_dim, xdim, ydim = self._read_vector_file_to_df(df, file)

        file.close()            
        return df.astype('float64'), vec_dim, xdim, ydim


    def _read_vector_file_to_df(self, df, file):
        xdim, ydim, vec_dim, position = 0, 0, 0, 0
        for byte in file:
            line = byte.decode('UTF-8')
            if line.startswith('$'):
                xdim, ydim, vec_dim = self._parse_vector_file_metadata(line, xdim, ydim, vec_dim)
                if xdim > 0 and ydim > 0 and len(df.columns) == 0:
                    df = pd.DataFrame(index=range(0, ydim * xdim), columns=range(0, vec_dim))
            else:
                if len(df.columns) == 0 or vec_dim == 0:
                    raise ValueError('Weight file has no correct Dimensional information.')
                position = self._parse_weight_file_data(line, position, vec_dim, df)
        return df, vec_dim, xdim, ydim


    def _parse_weight_file_data(self, line, position, vec_dim, df):
        splitted=line.split(' ')
        try:
            df.values[position] = list(np.array(splitted[0:vec_dim]).astype(float))
            position += 1
        except: raise ValueError('The input-vector file does not match its unit-dimension.') 
        return  position


    def _parse_vector_file_metadata(self, line, xdim, ydim, vec_dim):
        splitted = line.split(' ')
        if splitted[0] == '$XDIM':      xdim = int(splitted[1])
        elif splitted[0] == '$YDIM':    ydim = int(splitted[1])
        elif splitted[0] == '$VEC_DIM': vec_dim = int(splitted[1])
        return xdim, ydim, vec_dim  

In [8]:
import minisom as som
from sklearn.preprocessing import MinMaxScaler

small_m, small_n = 40, 20
large_m, large_n = 100, 60

In [9]:
# Train
chainlink = SOMToolBox_Parse('datasets/chainlink.vec')
chainlink, _, _, _ = chainlink.read_weight_file()
chainlink = MinMaxScaler().fit_transform(chainlink)
chainlink_dim = chainlink.shape[-1]

chainlink_small = som.MiniSom(small_m, small_n, chainlink_dim, sigma=0.8, learning_rate=0.7)
chainlink_small.train_random(chainlink, 10000, verbose=True)

# Visualizaton
chainlink_small_viz = SomViz(chainlink_small._weights.reshape(-1, chainlink_dim), small_m, small_n)

 [ 10000 / 10000 ] 100% 0.00945 it/s

In [10]:
chainlink_small_pm1 = chainlink_small_viz.pmatrix(color='viridis', idata=chainlink, perc=1, title='P-matrix (1 percentile)')
chainlink_small_pm5 = chainlink_small_viz.pmatrix(color='viridis', idata=chainlink, perc=5, title='P-matrix (5 percentile)')
chainlink_small_pm18 = chainlink_small_viz.pmatrix(color='viridis', idata=chainlink, perc=18, title='P-matrix (18 percentile)')
chainlink_small_pm50 = chainlink_small_viz.pmatrix(color='viridis', idata=chainlink, perc=50, title='P-matrix (50 percentile)')

display(HBox([chainlink_small_pm1, chainlink_small_pm5, chainlink_small_pm18, chainlink_small_pm50]))

HBox(children=(FigureWidget({
    'data': [{'colorscale': [[0.0, '#440154'], [0.1111111111111111, '#482878'],
…

In [11]:
chainlink_small_um = chainlink_small_viz.umatrix(color='viridis', title='U-matrix')

chainlink_small_usm1 = chainlink_small_viz.usmatrix(color='viridis', idata=chainlink, perc=1, title='U*-matrix (1 percentile)')
chainlink_small_usm5 = chainlink_small_viz.usmatrix(color='viridis', idata=chainlink, perc=5, title='U*-matrix (5 percentile)')
chainlink_small_usm18 = chainlink_small_viz.usmatrix(color='viridis', idata=chainlink, perc=18, title='U*-matrix (18 percentile)')
chainlink_small_usm50 = chainlink_small_viz.usmatrix(color='viridis', idata=chainlink, perc=50, title='U*-matrix (50 percentile)')

display(HBox([chainlink_small_um, chainlink_small_usm1, chainlink_small_usm5, chainlink_small_usm18, chainlink_small_usm50]))

HBox(children=(FigureWidget({
    'data': [{'colorscale': [[0.0, '#440154'], [0.1111111111111111, '#482878'],
…

In [12]:
chainlink_small_qe = chainlink_small_viz.qe(color='viridis', idata=chainlink, title='Quantization error ')
chainlink_small_mqe = chainlink_small_viz.mqe(color='viridis', idata=chainlink, title='Mean quantization error ')

display(HBox([chainlink_small_qe, chainlink_small_mqe]))

HBox(children=(FigureWidget({
    'data': [{'colorscale': [[0.0, '#440154'], [0.1111111111111111, '#482878'],
…

In [13]:
# Same process for large chainlink, small clusters e large clusters here

In [14]:
# Train
clusters = SOMToolBox_Parse('datasets/clusters.vec')
clusters, _, _, _ = clusters.read_weight_file()
clusters = MinMaxScaler().fit_transform(clusters)
clusters_dim = clusters.shape[-1]

# code for training here

# Visualizaton


In [15]:
# Read from SOMToolBox
iris = SOMToolBox_Parse('datasets/iris.vec')
iris, _, _, _ = iris.read_weight_file()

iris_pretrained = SOMToolBox_Parse('datasets/iris.wgt.gz')
iris_pretrained, iris_dim, iris_m, iris_n = iris_pretrained.read_weight_file()

# Visualizaton
iris_viz = SomViz(iris_pretrained.values.reshape(-1, iris_dim), iris_m, iris_n)

In [16]:
# visualizations for iris here