In [1]:
import pandas as pd
import numpy as np
import gzip


class SOMToolBox_Parse:
    def __init__(self, filename):
        self.filename = filename


    def read_weight_file(self,):
        df = pd.DataFrame()
        if self.filename[-3:len(self.filename)] == '.gz':
            with gzip.open(self.filename, 'rb') as file:
                df, vec_dim, xdim, ydim = self._read_vector_file_to_df(df, file)
        else:
            with open(self.filename, 'rb') as file:
                df, vec_dim, xdim, ydim = self._read_vector_file_to_df(df, file)

        file.close()            
        return df.astype('float64'), vec_dim, xdim, ydim


    def _read_vector_file_to_df(self, df, file):
        xdim, ydim, vec_dim, position = 0, 0, 0, 0
        for byte in file:
            line = byte.decode('UTF-8')
            if line.startswith('$'):
                xdim, ydim, vec_dim = self._parse_vector_file_metadata(line, xdim, ydim, vec_dim)
                if xdim > 0 and ydim > 0 and len(df.columns) == 0:
                    df = pd.DataFrame(index=range(0, ydim * xdim), columns=range(0, vec_dim))
            else:
                if len(df.columns) == 0 or vec_dim == 0:
                    raise ValueError('Weight file has no correct Dimensional information.')
                position = self._parse_weight_file_data(line, position, vec_dim, df)
        return df, vec_dim, xdim, ydim


    def _parse_weight_file_data(self, line, position, vec_dim, df):
        splitted=line.split(' ')
        try:
            df.values[position] = list(np.array(splitted[0:vec_dim]).astype(float))
            position += 1
        except: raise ValueError('The input-vector file does not match its unit-dimension.') 
        return  position


    def _parse_vector_file_metadata(self, line, xdim, ydim, vec_dim):
        splitted = line.split(' ')
        if splitted[0] == '$XDIM':      xdim = int(splitted[1])
        elif splitted[0] == '$YDIM':    ydim = int(splitted[1])
        elif splitted[0] == '$VEC_DIM': vec_dim = int(splitted[1])
        return xdim, ydim, vec_dim  

In [2]:
import plotly.graph_objects as go
from ipywidgets import Layout, HBox, Box, widgets, interact
from scipy.spatial import distance_matrix, distance


from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [57]:
def dmatrix(self, som_map=None, color='Viridis', interp='best', title=''):
    dm = np.zeros(self.m * self.n)
    neuron_locs = list()
    
    for i in range(self.m):
        for j in range(self.n):
            neuron_locs.append(np.array([i, j]))
    neuron_distmat = distance.cdist(neuron_locs, neuron_locs, 'chebyshev')

    for i in range(self.m * self.n):
        neighbor_idxs = neuron_distmat[i] <= 1
        neighbor_weights = self.weights[neighbor_idxs]
        dm[i] = distance_matrix([self.weights[i]], neighbor_weights).mean()

    if som_map == None:
        return self.plot(dm.reshape(self.m, self.n), color=color, interp=interp, title=title)    
    else:
        som_map.data[0].z = dm.reshape(self.m,self.n)

In [58]:
def pmatrix(self, som_map=None, color='Viridis', idata=[], perc=18, interp='best', title=''):
    pm = np.zeros(self.m * self.n)
    
    indistmat = distance_matrix(idata, idata)
    radius = np.percentile(indistmat, perc)
    
    distmat = distance_matrix(self.weights, idata)

    for i in range(self.m * self.n):
        data_idxs = distmat[i] <= radius
        pm[i] = np.count_nonzero(data_idxs)

    if som_map == None:
        return self.plot(pm.reshape(self.m, self.n), color=color, interp=interp, title=title)    
    else:
        som_map.data[0].z = pm.reshape(self.m,self.n)

In [198]:
from scipy.ndimage import correlate

def umatrix(self, som_map=None, color='Viridis', interp='best', title=''):
    m, n = self.m, self.n
    
    idx1 = np.kron(np.arange(m * n).reshape(m, n), np.ones((3, 3)))
    
    k1 = np.array([-1, 0, 1] * n)[None, :]
    k2 = np.array([-n, 0, n] * m)[:, None]
    idx2 = idx1 + k1 + k2
    
    idx1 = idx1[1:-1, 1:-1].astype(np.int32).ravel()
    idx2 = idx2[1:-1, 1:-1].astype(np.int32).ravel()
    
    dist = np.sqrt(np.sum(np.square(self.weights[idx1] - self.weights[idx2]), axis=-1))
    um = dist.reshape(m * 3 - 2, n * 3 - 2)
    
    w = [[0, 1, 0],
         [1, 0, 1],
         [0, 1, 0]]
    
    corr = correlate(um, w) / 4
    
    crows = np.arange(0, m * 3 - 2, 2)[:, None]
    ccols = np.arange(0, n * 3 - 2, 2)[None, :]
    um[crows, ccols] = corr[crows, ccols]
    
    crows = np.arange(1, m * 3 - 2, 2)[:, None]
    ccols = np.arange(1, n * 3 - 2, 2)[None, :]
    um[crows, ccols] = corr[crows, ccols]

    if som_map == None:
        return self.plot(um, color=color, interp=interp, title=title)    
    else:
        som_map.data[0].z = um

In [144]:
# a = np.kron(np.arange(10).reshape(5, 2), np.ones((3, 3)))

# k1 = np.array([-1, 0, 1] * 2)[None, :]
# k2 = np.array([-2, 0, 2] * 5)[:, None]
# b = (a + k1 + k2)[1:-1, 1:-1]
# a = a[1:-1, 1:-1]

# i = np.arange(0, 5 * 3 - 2, 3)[:, None]
# j = np.arange(0, 2 * 3 - 2, 3)[None, :]
# #b[i, j] = np.ones((5, 2)) *  10
# b

m, n = 10, 10
idx1 = np.kron(np.arange(m * n).reshape(m, n), np.ones((3, 3)))
    
k1 = np.array([-1, 0, 1] * n)[None, :]
k2 = np.array([-n, 0, n] * m)[:, None]
idx2 = idx1 + k1 + k2

idx1 = idx1[1:-1, 1:-1].astype(np.int32)
idx2 = idx2[1:-1, 1:-1].astype(np.int32)

idx2

array([[ 0,  1,  0,  1,  2,  1,  2,  3,  2,  3,  4,  3,  4,  5,  4,  5,
         6,  5,  6,  7,  6,  7,  8,  7,  8,  9,  8,  9],
       [10, 11, 10, 11, 12, 11, 12, 13, 12, 13, 14, 13, 14, 15, 14, 15,
        16, 15, 16, 17, 16, 17, 18, 17, 18, 19, 18, 19],
       [ 0,  1,  0,  1,  2,  1,  2,  3,  2,  3,  4,  3,  4,  5,  4,  5,
         6,  5,  6,  7,  6,  7,  8,  7,  8,  9,  8,  9],
       [10, 11, 10, 11, 12, 11, 12, 13, 12, 13, 14, 13, 14, 15, 14, 15,
        16, 15, 16, 17, 16, 17, 18, 17, 18, 19, 18, 19],
       [20, 21, 20, 21, 22, 21, 22, 23, 22, 23, 24, 23, 24, 25, 24, 25,
        26, 25, 26, 27, 26, 27, 28, 27, 28, 29, 28, 29],
       [10, 11, 10, 11, 12, 11, 12, 13, 12, 13, 14, 13, 14, 15, 14, 15,
        16, 15, 16, 17, 16, 17, 18, 17, 18, 19, 18, 19],
       [20, 21, 20, 21, 22, 21, 22, 23, 22, 23, 24, 23, 24, 25, 24, 25,
        26, 25, 26, 27, 26, 27, 28, 27, 28, 29, 28, 29],
       [30, 31, 30, 31, 32, 31, 32, 33, 32, 33, 34, 33, 34, 35, 34, 35,
        36, 35, 36, 37, 3

In [146]:
def u_star_matrix(self, som_map=None, color='Viridis', idata=[], perc=10, interp='best', title=''):
    pm = self.pmatrix(idata=idata, perc=perc).data[0].z
    um = self.umatrix().data[0].z
    
    pm = (np.max(pm) - pm) / (np.max(pm) - np.min(pm))
    um = (um - np.min(um)) / (np.max(um) - np.min(um))
    
    usm = np.mean([pm, um], axis=0)
    
    if som_map == None:
        return self.plot(usm, color=color, interp=interp, title=title)    
    else:
        som_map.data[0].z = usm

In [199]:
def qe(self, som_map=None, color='Viridis', idata=[], interp='best', title=''):
    dist = distance_matrix(self.weights, idata)
    bmu = np.argmin(dist, axis=0)
    idx = np.arange(self.m * self.n)[:, None] == bmu[None, :]
    qe = np.sum(np.where(idx, dist, 0), axis=-1)

    if som_map == None:
        return self.plot(qe.reshape(self.m, self.n), color=color, interp=interp, title=title)    
    else:
        som_map.data[0].z = qe.reshape(self.m,self.n)

In [200]:
def mqe(self, som_map=None, color='Viridis', idata=[], interp='best', title=''):
    dist = distance_matrix(self.weights, idata)
    bmu = np.argmin(dist, axis=0)
    idx = np.arange(self.m * self.n)[:, None] == bmu[None, :]
    mqe = np.mean(np.where(idx, dist, 0), axis=-1)

    if som_map == None:
        return self.plot(mqe.reshape(self.m, self.n), color=color, interp=interp, title=title)    
    else:
        som_map.data[0].z = mqe.reshape(self.m,self.n)

In [201]:
class SomViz:
    
    def __init__(self, weights=[], m=None, n=None):
        self.weights = weights
        self.m = m
        self.n = n
        
    dmatrix = dmatrix
    pmatrix = pmatrix
    umatrix = umatrix
    u_star_matrix = u_star_matrix
    qe = qe
    mqe = mqe

#     def umatrix(self, som_map=None, color="Viridis", interp="best", title=""):
#         um = np.zeros((self.m * self.n, 1))
#         neuron_locs = list()
#         for i in range(self.m):
#             for j in range(self.n):
#                 neuron_locs.append(np.array([i, j]))
#         neuron_distmat = distance_matrix(neuron_locs,neuron_locs)

#         for i in range(self.m * self.n):
#             neighbor_idxs = neuron_distmat[i] <= 1
#             neighbor_weights = self.weights[neighbor_idxs]
#             um[i] = distance_matrix(np.expand_dims(self.weights[i], 0), neighbor_weights).mean()

#         if som_map == None: return self.plot(um.reshape(self.m,self.n), color=color, interp=interp, title=title)    
#         else: som_map.data[0].z = um.reshape(self.m,self.n)

    def hithist(self, som_map=None, idata = [], color='RdBu', interp = "best", title=""):
        hist = [0] *self.n *self.m
        for v in idata: 
            position =np.argmin(np.sqrt(np.sum(np.power(self.weights - v, 2), axis=1)))
            hist[position] += 1    
        
        if som_map==None: return self.plot(np.array(hist).reshape(self.m,self.n), color=color, interp=interp, title=title)        
        else:  som_map.data[0].z = np.array(hist).reshape(self.m,self.n)

    def component_plane(self, som_map=None, component=0, color="Viridis", interp = "best", title=""):
        if som_map==None: return self.plot(self.weights[:,component].reshape(-1,self.n), color=color, interp=interp, title=title)   
        else:  som_map.data[0].z = self.weights[:,component].reshape(-1,n)

    def sdh(self, som_map=None, idata=[], sdh_type=1, factor=1, draw=True, color="Cividis", interp = "best", title=""):

        import heapq
        sdh_m = [0] *self.m *self.n

        cs=0
        for i in range(0,factor): cs += factor-i

        for vector in idata:
            dist = np.sqrt(np.sum(np.power(self.weights - vector, 2), axis=1))
            c = heapq.nsmallest(factor, range(len(dist)), key=dist.__getitem__)
            if (sdh_type==1): 
                for j in range(0,factor):  sdh_m[c[j]] += (factor-j)/cs # normalized
            if (sdh_type==2):
                for j in range(0,factor): sdh_m[c[j]] += 1.0/dist[c[j]] # based on distance
            if (sdh_type==3): 
                dmin = min(dist)
                for j in range(0,factor): sdh_m[c[j]] += 1.0 - (dist[c[j]]-dmin)/(max(dist)-dmin)  

        if som_map==None: return self.plot(np.array(sdh_m).reshape(-1,self.n), color=color, interp=interp, title=title)      
        else: som_map.data[0].z = np.array(sdh_m).reshape(-1,self.n)
        
    def project_data(self,som_m=None, idata=[], title=""):

        data_y = []
        data_x = []
        for v in idata:
            position =np.argmin(np.sqrt(np.sum(np.power(self.weights - v, 2), axis=1)))
            x,y = position % self.n, position // self.n
            data_x.extend([x])
            data_y.extend([y])
            
        if som_m!=None: som_m.add_trace(go.Scatter(x=data_x, y=data_y, mode = "markers", marker_color='rgba(255, 255, 255, 0.8)',))
    
    def time_series(self, som_m=None, idata=[], wsize=50, title=""):
             
        data_y = []
        data_x = [i for i in range(0,len(idata))]
        
        data_x2 = []
        data_y2 = []
        
        qmin = np.Inf
        qmax = 0
        
        step=1
        
        ps = []
        for v in idata:
            matrix = np.sqrt(np.sum(np.power(self.weights - v, 2), axis=1))
            position = np.argmin(matrix)
            qerror = matrix[position]
            if qmin>qerror: qmin = qerror
            if qmax<qerror: qmax = qerror
            ps.append((position, qerror))
       
        markerc=[]    
        for v in ps:
            data_y.extend([v[0]])
            rez = v[1]/qmax
 
            markerc.append('rgba(0, 0, 0, '+str(rez)+')') 
            
            x,y = v[0] % self.n, v[0] // self.n 
            if    x==0: y = np.random.uniform(low=y, high=y+.1)
            elif  x==self.m-1: y = np.random.uniform(low=y-.1, high=y)
            elif  y==0: x = np.random.uniform(low=x, high=x+.1)
            elif  y==self.n-1: x = np.random.uniform(low=x-.1, high=x)
            else: x,y = np.random.uniform(low=x-.1, high=x+.1), np.random.uniform(low=y-.1, high=y+.1)                           
            
            data_x2.extend([x])
            data_y2.extend([y]) 
    
        ts_plot = go.FigureWidget(go.Scatter(x=[], y=[], mode = "markers", marker_color=markerc, marker=dict(colorscale='Viridis', showscale=True, color=np.random.randn(500))))
        ts_plot.update_xaxes(range=[0, wsize])       

        
        ts_plot.data[0].x, ts_plot.data[0].y = data_x, data_y
        som_m.add_trace(go.Scatter(x=data_x2, y=data_y2, mode = "markers",))
  
        som_m.layout.height = 500
        ts_plot.layout.height = 500
        som_m.layout.width = 500
        ts_plot.layout.width = 1300
        
        return HBox([go.FigureWidget(som_m), go.FigureWidget(ts_plot)])
    
    def plot(self, matrix, color="Viridis", interp = "best", title=""):
        return go.FigureWidget(go.Heatmap(z=matrix, zsmooth=interp, showscale=False, colorscale=color), layout=go.Layout(width=700, height=700,title=title, title_x=0.5,))


In [152]:
import pandas as pd
import minisom as som
from sklearn import datasets, preprocessing
#interp: False, 'best', 'fast', 
#color = 'viridis': https://plotly.com/python/builtin-colorscales/



#############################
######## miniSOM ############1/0
#############################
m=10
n=10

# Pre-processing 
iris = datasets.load_iris().data
min_max_scaler = preprocessing.MinMaxScaler()
iris = min_max_scaler.fit_transform(iris)

# Train
s = som.MiniSom(m, n, iris.shape[1], sigma=0.8, learning_rate=0.7)
s.train_random(iris, 10000, verbose=False)

# Visualizaton
viz_miniSOM = SomViz(s._weights.reshape(-1,4), m, n)
um1 = viz_miniSOM.umatrix(color='magma', interp='best', title='U-matrix miniSOM')


##########################################
######## read from SOMToolBox ############
##########################################
trainedmap = SOMToolBox_Parse('datasets/iris.vec')
idata, idim, idata_x, idata_y = trainedmap.read_weight_file()

smap = SOMToolBox_Parse('datasets/iris.wgt.gz')
smap, sdim, smap_x, smap_y = smap.read_weight_file()

# Visualizaton
viz_SOMToolBox = SomViz(smap.values.reshape(-1,sdim), smap_y, smap_x)
um2 = viz_SOMToolBox.umatrix(color='viridis', interp=False, title='U-matrix SOMToolBox') 

display(HBox([um1, um2]))

HBox(children=(FigureWidget({
    'data': [{'colorscale': [[0.0, '#000004'], [0.1111111111111111, '#180f3d'],
…

In [202]:
data = SOMToolBox_Parse('datasets/iris.vec')
data, _, _, _ = data.read_weight_file()

smap = SOMToolBox_Parse('datasets/iris.wgt.gz')
smap, sdim, smap_x, smap_y = smap.read_weight_file()

# Visualizaton
viz = SomViz(smap.values.reshape(-1, sdim), smap_y, smap_x)
um = viz.umatrix(color='reds', interp=None, title='D-matrix SOMToolBox')
pm = viz.pmatrix(color='reds', idata=data, perc=18, interp=None, title='P-matrix SOMToolBox')
# usm = viz.u_star_matrix(color='reds', idata=data, perc=18, interp=None, title='U*-matrix SOMToolBox')
qe = viz.qe(color='reds', idata=data, interp=None, title='qe SOMToolBox')
mqe = viz.mqe(color='reds', idata=data, interp=None, title='mqe SOMToolBox')
display(HBox([um, pm]))
#display(HBox([qe, mqe]))

HBox(children=(FigureWidget({
    'data': [{'colorscale': [[0.0, 'rgb(255,245,240)'], [0.125,
                …