In [None]:
# install minisom from jupyter notebook cell
# !pip install minisom

# OR 
# install from Terminal/Anaconda Prompt 
# (Don't forget to restart Anaconda Navigator)
# pip install minisom


# Source and documentation:
# https://github.com/JustGlowing/minisom/

In [None]:
from os.path import join
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.neighbors import KNeighborsClassifier

from minisom import MiniSom

In [None]:
## Import Matplotlib functions to create MiniSOM visualizations

from matplotlib.patches import RegularPolygon, Ellipse
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib import cm, colorbar
from matplotlib import colors as mpl_colors
from matplotlib.colors import LinearSegmentedColormap

from matplotlib.lines import Line2D
import seaborn as sns

from matplotlib import __version__ as mplver

In [None]:
## Recommended version at least 3.7.0 or greater
print("matplotlib version is:" , mplver)

## Import preprocessed data

In [None]:
df = pd.read_csv(join('..', 'data', 'data_preprocessed.csv'))

In [None]:
df.head()

In [None]:
df.columns

In [None]:
# Splitting feature names into groups
# Remember which metric_features we decided to keep?
metric_features = ['income',
 'frq',
 'rcn',
 'clothes',
 'kitchen',
 'small_appliances',
 'toys',
 'house_keeping',
 'per_net_purchase',
 'spent_online']

non_metric_features = df.columns[df.columns.str.startswith('oh_')].tolist() # CODE HERE
pc_features = df.columns[df.columns.str.startswith('PC')].tolist()  # CODE HERE

unused_features = [i for i in df.columns if i not in (metric_features+non_metric_features+pc_features) ]

In [None]:
print('metric_features:', metric_features)
print('\nnon_metric_features:', non_metric_features)
print('\nunused_features:', unused_features)
print('\npc_features:', pc_features)


---

## Self-organizing maps
What is a SOM? How does it work? What is it used for?

The SOM objective is to adjust the units to the data in the input space, so that the
network is (as best as possible) representative of the training dataset.

### How is it computed?
### Important concepts:
- Units and observations
- BMU
- Neighborhood function
- Input and Output space

**Video:**

(May be a good idea to mute the video before playing)

https://www.youtube.com/watch?v=k7DK5fnJH94

https://www.youtube.com/watch?v=zyYZuAQZWTM



### Characteristics:
- Grid shape needs to be set a priori
- Results depend on the initialization (even tough it can be quite robust to it)
- Fitting a SOM can be computationally expensive
- Capable of finding the global optimum (theoretically - if the LR -> 0)
- Visualization tool for high-dimensional data

### Additional analyses/tutorials
- [Air Flights](https://github.com/sevamoo/SOMPY/blob/master/sompy/examples/AirFlights_hexagonal_grid.ipynb)
- [Visualizations on toy datasets](https://gist.github.com/sevamoo/035c56e7428318dd3065013625f12a11)

### Basic Example

Based on https://github.com/JustGlowing/minisom/blob/master/examples/ColorSpaceMapping.ipynb

In [None]:
# Some helper functions
def tidy_ax(ax, major_ticks, minor_ticks, minor_lim=None):

    ax.set_xticks(major_ticks-.5)
    ax.set_xticks(minor_ticks-.5, minor=True)

    ax.set_xticklabels([])
    ax.set_yticklabels([])

    ax.grid(which='both', alpha=0.5, color='white', linewidth=1)

    ax.tick_params(axis='both', which='both', direction="in", width=0, length=0)

    if minor_lim == None:
        minor_lim = major_ticks.max()

    ax.set_yticks(major_ticks[major_ticks<minor_lim]-.5, )
    ax.set_yticks(minor_ticks[minor_ticks<minor_lim]-.5, minor=True)
    
    ax.set_aspect(1)

    return ax

In [None]:
# Initialize Random Number Generator from numpy
rng = np.random.default_rng(638468314)

# Generate random colors using RGB code
random_colors = rng.uniform(0,1,(100,3)).round(2)
random_colors_df = pd.DataFrame(random_colors, columns=["R","G","B"])
random_colors_df.reset_index(inplace=True)

# Preview randomly generated colors
sns.color_palette(random_colors)



In [None]:
# Initialize MiniSom

colors_dims = (30, 40)

som_colors = MiniSom(*colors_dims,          # Size of SOM grid
                     3,                     # Number of features
                     sigma=3.,              # Neighborhood radius
                     learning_rate=2.5,     # Learning rate
                     random_seed=42,        # Random seed
                     neighborhood_function='gaussian' # Neighborhood radius function
                     )


In [None]:

fig, axes = plt.subplot_mosaic([
                            ['two', 'two'],
                            ['left', 'right'],
                            ],
                               figsize=(16,8), 
                         height_ratios=[1,19],
                         constrained_layout=True)

################################
## Plot random color (input data)
################################

ax = axes['two']

sns.scatterplot(random_colors_df, x='index', y=0, 
                palette=random_colors.tolist(), hue='index', 
                ax=ax, legend=False, s=100, edgecolor='white')
ax.set_xticks([])
ax.set_yticks([])
ax.margins(0.01)
ax.set_xlabel("")
ax.set_title("Original Random Color Values")

################################
## Plot random initialized weights
################################


major_ticks = np.arange(0, 41, 5)
minor_ticks = np.arange(0, 41, 1)

ax = axes['left']

ax.imshow(abs(som_colors.get_weights()), interpolation='none', origin="lower", alpha=.75)

ax = tidy_ax(ax, major_ticks, minor_ticks, 30)

ax.set_title("SOM Random Weights")




################################
## Plot trained SOM
################################

som_colors.train(random_colors, 500, random_order=True, verbose=True)

ax = axes['right']

ax.imshow(abs(som_colors.get_weights()), interpolation='none', origin="lower", alpha=.75)

for i in random_colors:
    yx = som_colors.winner(i)

    ax.scatter(yx[1], yx[0], c=[i], edgecolors='white', alpha=1, s=100, linewidth=2)
    


ax = tidy_ax(ax, major_ticks, minor_ticks, 30)
ax.set_title("SOM After Training")
fig.savefig("./../figures/clustering/som_rgb.png")

plt.show()

In [None]:

fig, axes = plt.subplots(2,3, figsize=(14,8), 
                        gridspec_kw={'hspace': .15},
                         constrained_layout=True,
                         dpi=120)

colors_dims = (30, 40)
major_ticks = np.arange(0, 41, 5)
minor_ticks = np.arange(0, 41, 1)

################################
## Plot random initialized weights
################################


# Initialize MiniSom

som_c = MiniSom(*colors_dims,          # Size of SOM grid
                 3,                     # Number of features
                 sigma=3.,              # Neighborhood radius
                 learning_rate=2.5,     # Learning rate
                 random_seed=42,        # Random seed
                 neighborhood_function='gaussian' # Neighborhood radius function
                 )




ax = axes[0][0]

ax.imshow(abs(som_c.get_weights()), interpolation='none', origin="lower")
ax = tidy_ax(ax, major_ticks, minor_ticks, 30)
qe = np.round(som_c.quantization_error(random_colors),3)
te = np.round(som_c.topographic_error(random_colors),3)
ax.set_title("SOM Random Weights\nQE:{} TE:{}".format(qe, te))


################################
## Plot trained SOM
################################

ax = axes[0][1]

som_c.train(random_colors, 500, random_order=True, verbose=False)
ax.imshow(abs(som_c.get_weights()), interpolation='none', origin="lower", alpha=.75)
ax = tidy_ax(ax, major_ticks, minor_ticks, 30)
qe = np.round(som_c.quantization_error(random_colors),3)
te = np.round(som_c.topographic_error(random_colors),3)
ax.set_title("SOM Trained 500 iters, sigma=3, LR=2.5\nQE:{} TE:{}".format(qe, te))

ax = axes[0][2]

som_c = MiniSom(*colors_dims,          # Size of SOM grid
                 3,                     # Number of features
                 sigma=3.,              # Neighborhood radius
                 learning_rate=2.5,     # Learning rate
                 random_seed=42,        # Random seed
                 neighborhood_function='gaussian' # Neighborhood radius function
                 )
som_c.train(random_colors, len(random_colors), random_order=True, verbose=False)
ax.imshow(abs(som_c.get_weights()), interpolation='none', origin="lower", alpha=.75)
ax = tidy_ax(ax, major_ticks, minor_ticks, 30)
qe = np.round(som_c.quantization_error(random_colors),3)
te = np.round(som_c.topographic_error(random_colors),3)
ax.set_title("SOM Trained N iters, sigma=3, LR=2.5\nQE:{} TE:{}".format(qe, te))

################################
## Plot trained SOM
################################

ax = axes[1][0]

som_c = MiniSom(*colors_dims,          # Size of SOM grid
                 3,                     # Number of features
                 sigma=1.,              # Neighborhood radius
                 learning_rate=2.5,     # Learning rate
                 random_seed=42,        # Random seed
                 neighborhood_function='gaussian' # Neighborhood radius function
                 )
som_c.train(random_colors, 500, random_order=True, verbose=False)
ax.imshow(abs(som_c.get_weights()), interpolation='none', origin="lower", alpha=.75)
ax = tidy_ax(ax, major_ticks, minor_ticks, 30)
qe = np.round(som_c.quantization_error(random_colors),3)
te = np.round(som_c.topographic_error(random_colors),3)
ax.set_title("SOM Trained 500 iters, sigma=1, LR=2.5\nQE:{} TE:{}".format(qe, te))

################################
## Plot trained SOM
################################

ax = axes[1][1]

som_c = MiniSom(*colors_dims,          # Size of SOM grid
                 3,                     # Number of features
                 sigma=3.,              # Neighborhood radius
                 learning_rate=1.5,     # Learning rate
                 random_seed=42,        # Random seed
                 neighborhood_function='gaussian' # Neighborhood radius function
                 )
som_c.train(random_colors, 500, random_order=True, verbose=False)
ax.imshow(abs(som_c.get_weights()), interpolation='none', origin="lower", alpha=.75)
ax = tidy_ax(ax, major_ticks, minor_ticks, 30)
qe = np.round(som_c.quantization_error(random_colors),3)
te = np.round(som_c.topographic_error(random_colors),3)
ax.set_title("SOM Trained 500 iters, sigma=3, LR=1.5\nQE:{} TE:{}".format(qe, te))

################################
## Plot trained SOM
################################

ax = axes[1][2]

som_c = MiniSom(*colors_dims,          # Size of SOM grid
                 3,                     # Number of features
                 sigma=3.,              # Neighborhood radius
                 learning_rate=2.5,     # Learning rate
                 random_seed=42,        # Random seed
                 neighborhood_function='gaussian' # Neighborhood radius function
                 )
som_c.train(random_colors, 2, random_order=True, verbose=False, use_epochs=True)
ax.imshow(abs(som_c.get_weights()), interpolation='none', origin="lower", alpha=.75)
ax = tidy_ax(ax, major_ticks, minor_ticks, 30)

qe = np.round(som_c.quantization_error(random_colors),3)
te = np.round(som_c.topographic_error(random_colors),3)
ax.set_title("SOM Batch Trained 2 iters/sample, sigma=3, LR=2.5\nQE:{} TE:{}".format(qe, te))

# fig.savefig("./../figures/clustering/som_demo.png")
plt.show()

## How to apply Self-Organizing Maps?

The training of a SOM is **more effective** if it is done in two phases: the unfolding phase, and the fine-tuning phase. 

In the **unfolding phase** the objective is to **spread the units** in the region of the input space where the data patterns are located. In this phase the neighbourhood function should have a large initial radius so that all units have high mobility and the map can quickly cover the input space.

The **fine tuning phase**, as the name implies, is the process of small adjustments in order to **reduce the quantization error**, and centre the units in the areas where the density of patterns is highest. Usually, in this phase the learning rate and the neighbourhood radius are smaller than the ones used in the unfolding phase. As these two parameters are smaller, the map will need more time to adjust its weights and that is why the number of iterations or epochs is normally higher.

**Exercise**
- Train a SOM with a 10x10 grid, random initialization, gaussian neighborhood function and hexagonal topology/lattice
- Set training of 100 iterations


Documentation of `train` method:

https://github.com/JustGlowing/minisom/blob/master/minisom.py#L467

In [None]:
M = 0
N = 0
neighborhood_function = None 
topology = None 
n_feats = len(metric_features)
learning_rate = None


som_data = df[metric_features].values

sm = MiniSom(M, N,              # 10x10 map size
             n_feats,           # Number of the elements of the vectors in input.
             learning_rate=learning_rate, 
             topology=topology, 
             neighborhood_function=neighborhood_function, 
             activation_distance='euclidean',
             random_seed=42
             )

# Initializes the weights of the SOM picking random samples from data.
sm.random_weights_init(som_data) 


print("Before training:")
print("QE", np.round(sm.quantization_error(som_data),4))
print("TE", np.round(sm.topographic_error(som_data),4))



# Trains the SOM using all the vectors in data sequentially
# minisom does not distinguish between unfolding and fine tuning phase;

sm.train_batch(som_data, 20000)

print("After training:")
print("QE", np.round(sm.quantization_error(som_data),4))
print("TE", np.round(sm.topographic_error(som_data),4))



## Visualizing data with SOMs

### Component planes
What do they represent? What kinds of information do they contain?

Analyse these plots from the following perspectives:
- Feature importance
- Feature correlation (both globally and locally)
- Data distribution
- Outlier detection

In [None]:
# What are these weights?
weights = sm.get_weights()
weights.shape

In [None]:
def plot_hexagons(som,              # Trained SOM model 
                  sf,               # matplotlib figure object
                  colornorm,        # colornorm
                  matrix_vals,      # SOM weights or
                  label="",         # title for figure
                  cmap=cm.Grays,    # colormap to use
                  annot=False       
                  ):

    
    axs = sf.subplots(1,1)
    
    for i in range(matrix_vals.shape[0]):
        for j in range(matrix_vals.shape[1]):

            wx, wy = som.convert_map_to_euclidean((i,j)) 

            hex = RegularPolygon((wx, wy), 
                                numVertices=6, 
                                radius= np.sqrt(1/3),
                                facecolor=cmap(colornorm(matrix_vals[i, j])), 
                                alpha=1, 
                                edgecolor='white',
                                linewidth=.5)
            axs.add_patch(hex)
            if annot==True:
                annot_val = np.round(matrix_vals[i,j],2)
                if int(annot_val) == annot_val:
                    annot_val = int(annot_val)
                axs.text(wx,wy, annot_val, 
                        ha='center', va='center', 
                        fontsize='x-small')


    ## Remove axes for hex plot
    axs.margins(.05)
    axs.set_aspect('equal')
    axs.axis("off")
    axs.set_title(label)

    

    # ## Add colorbar
    divider = make_axes_locatable(axs)
    ax_cb = divider.append_axes("right", size="5%", pad="0%")

    ## Create a Mappable object
    cmap_sm = plt.cm.ScalarMappable(cmap=cmap, norm=colornorm)
    cmap_sm.set_array([])

    ## Create custom colorbar 
    cb1 = colorbar.Colorbar(ax_cb,
                            orientation='vertical', 
                            alpha=1,
                            mappable=cmap_sm
                            )
    cb1.ax.get_yaxis().labelpad = 6

    # Add colorbar to plot
    sf.add_axes(ax_cb)




    return sf 




In [None]:
##############################
# Plot Component Planes
##############################

figsize=(10,7)
fig = plt.figure(figsize=figsize, constrained_layout=True, dpi=128, )

subfigs = fig.subfigures(3,4,wspace=.15)

colornorm = mpl_colors.Normalize(vmin=np.min(weights), vmax=np.max(weights))

for cpi, sf in zip(range(len(metric_features)), subfigs.flatten()):
    
    matrix_vals = weights[:,:,cpi]
    vext = np.max(np.abs([np.min(matrix_vals), np.max(matrix_vals)]))
    colornorm = mpl_colors.Normalize(vmin=np.min(matrix_vals), vmax=np.max(matrix_vals))
    # colornorm = mpl_colors.CenteredNorm(vcenter=0, halfrange=vext)


    sf = plot_hexagons(sm, sf, 
                    colornorm,
                    matrix_vals,
                    label=metric_features[cpi],
                    cmap=cm.coolwarm,
                    )

### U-matrix
Encode each neuron in the output space with the **average distance** to its neighbors in the input space.

Analyse these plots from the following perspectives:
- Clusters of units
- Potential outliers (units which are very distant from its neighbors and have low frequency)

In [None]:
umatrix = sm.distance_map(scaling='mean')
fig = plt.figure(figsize=figsize)

colornorm = mpl_colors.Normalize(vmin=np.min(umatrix), vmax=np.max(umatrix))

fig = plot_hexagons(sm, fig, 
                    colornorm,
                    umatrix,
                    label="U-matrix",
                    cmap=cm.RdYlBu_r,
                    annot=True
                    )

In [None]:
## Flip and rotate to match plot
print(np.flip(np.round(umatrix,2), axis=1).T)

### Hit-map
Show the **frequency** of each Unit in the output map

In [None]:
hitsmatrix = sm.activation_response(df[metric_features].values)


fig = plt.figure(figsize=figsize)

colornorm = mpl_colors.Normalize(vmin=0, vmax=np.max(hitsmatrix))

fig = plot_hexagons(sm, fig, 
                    colornorm,
                    hitsmatrix,
                    label="SOM Hits Map",
                    cmap=cm.Greens,
                    annot=True
                    )


In [None]:
## Values of HITS:
## Flip and rotate to match plot
np.flip(np.round(hitsmatrix,2), axis=1).T


In [None]:
sm.quantization_error(df[metric_features].values)

## Clustering with SOMs: K-means SOM vs Emergent SOM
- In **k-means SOM**, the *number of units should be equal to the expected number of clusters*, and thus each cluster should be represented by a single unit. 
- In **emergent SOM**, a very *large number of units is used*. These very large SOM allow for very clear U-Matrices and are useful for detecting quite clearly the underlying structure of the data. This technique can be **used together with other clustering algorithms**.

**Exercise**
- Train a SOM with a 50x50 grid, random initialization, gaussian neighborhood function and hexagonal topology/lattice
- Set an unfolding phase and a fine tuning phase of 100 iterations

In [None]:

# M = 50
# N = 50

M = 20 # 50 takes too long to run
N = 30 # 
neighborhood_function = 'gaussian' 
topology = 'hexagonal' 
n_feats = len(metric_features)
learning_rate = .7


som_data = df[metric_features].values

sm = MiniSom(M, N,              # 10x10 map size
             n_feats,           # Number of the elements of the vectors in input.
             learning_rate=learning_rate, 
             topology=topology, 
             neighborhood_function=neighborhood_function, 
             activation_distance='euclidean',
             random_seed=42
             )


som_data = df[metric_features].values
# Initializes the weights of the SOM picking random samples from data.
sm.random_weights_init(som_data) 

print(np.round(sm.quantization_error(som_data),4), "Starting QE")
print(np.round(sm.topographic_error(som_data),4), "Starting TE")


# Trains the SOM using all the vectors in data sequentially
# minisom does not distinguish between unfolding and fine tuning phase;

sm.train_batch(som_data, 500000)
print(np.round(sm.quantization_error(som_data),4),"Ending QE")
print(np.round(sm.topographic_error(som_data),4),"Ending TE")


In [None]:
##############################
# Plot Component Planes
##############################

weights = sm.get_weights()

figsize=(10,7)
fig = plt.figure(figsize=figsize, constrained_layout=True, dpi=128, )

subfigs = fig.subfigures(3,4,wspace=.15)

colornorm = mpl_colors.Normalize(vmin=np.min(weights), vmax=np.max(weights))

for cpi, sf in zip(range(len(metric_features)), subfigs.flatten()):
    
    matrix_vals = weights[:,:,cpi]
    vext = np.max(np.abs([np.min(matrix_vals), np.max(matrix_vals)]))
    colornorm = mpl_colors.Normalize(vmin=np.min(matrix_vals), vmax=np.max(matrix_vals))


    sf = plot_hexagons(sm, sf, 
                    colornorm,
                    matrix_vals,
                    label=metric_features[cpi],
                    cmap=cm.coolwarm,
                    )

In [None]:
umatrix = sm.distance_map(scaling='mean')
fig = plt.figure(figsize=figsize)

colornorm = mpl_colors.Normalize(vmin=np.min(umatrix), vmax=np.max(umatrix))

fig = plot_hexagons(sm, fig, 
                    colornorm,
                    umatrix,
                    label="U-matrix",
                    cmap=cm.RdYlBu_r,
                    )

In [None]:
umatrix.shape

In [None]:
hitsmatrix = sm.activation_response(df[metric_features].values)


fig = plt.figure(figsize=figsize)

colornorm = mpl_colors.Normalize(vmin=0, vmax=np.max(hitsmatrix))

fig = plot_hexagons(sm, fig, 
                    colornorm,
                    hitsmatrix,
                    label="SOM Hits Map",
                    cmap=cm.Greens,
                    )


Now we can have a better idea of how the input space look like in terms of distances.

### K-Means on top of SOM units
- Define number of clusters to retain

In [None]:
# Exercise:
# Do the Inertia plot here (check last class' notebook)

In [None]:
weights_flat = sm.get_weights().reshape((M*N),len(metric_features))
weights_flat.shape

In [None]:
# Perform K-Means clustering on top of the MxN units (sm.get_node_vectors() output)
kmeans = KMeans(n_clusters=4, init='k-means++', n_init=20, random_state=42)
nodeclus_labels = kmeans.fit_predict(weights_flat)
nodeclus_labels

In [None]:
kmeans_matrix = nodeclus_labels.reshape((M,N))

fig = plt.figure(figsize=figsize)

colornorm = mpl_colors.Normalize(vmin=0, vmax=np.max(kmeans_matrix))

fig = plot_hexagons(sm, fig, 
                    colornorm,
                    kmeans_matrix,
                    label="SOM K-Means",
                    cmap=cm.Spectral,
                    )


### Hierarchical Clustering on top of SOM units
- Define best linkage method
- Define number of clusters to retain

In [None]:
# Exercise:
# Do the R² plot here and the Dendrogram (check last class' notebook)

In [None]:
# Perform Hierarchical clustering on top of the MxN units 

hierclust = AgglomerativeClustering(n_clusters=4, linkage='ward')
nodeclus_labels = hierclust.fit_predict(weights_flat)
hclust_matrix = nodeclus_labels.reshape((M,N))

fig = plt.figure(figsize=figsize)

colornorm = mpl_colors.Normalize(vmin=0, vmax=np.max(hclust_matrix))

fig = plot_hexagons(sm, fig, 
                    colornorm,
                    hclust_matrix,
                    label="SOM Hierarchical",
                    cmap=cm.Spectral,
                    )

### Final SOM Clustering solution

In [None]:
# Check the nodes and and respective clusters
nodes = weights_flat

df_nodes = pd.DataFrame(nodes, columns=metric_features)
df_nodes['label'] = nodeclus_labels
df_nodes

In [None]:
## This gets BMU coordinates, e.g. (4,4) for each data point
bmu_index = np.array([sm.winner(x) for x in df[metric_features].values])

print(bmu_index.shape)

bmu_index

In [None]:

## This gets the cluster label from hclust_matrix, i.e.
## if data row 0 has BMU at (37, 28) 
## it will get the label associated to node (37,28) using label associated to hclust_matrix[37,28] above

som_final_labels = [hclust_matrix[i[0]][i[1]] for i in bmu_index]


In [None]:
df_final = pd.concat([
                df,
                pd.Series(som_final_labels, name='label', index=df.index)
            ], axis=1
            )

df_final

In [None]:
# Characterizing the final clusters
df_final[metric_features+['label']].groupby('label').mean()

## Analysing the appropriateness of our solution
### R²

In [None]:
# using R²
def get_ss(df):
    ss = np.sum(df.var() * (df.count() - 1))
    return ss  # return sum of sum of squares of each df variable

sst = get_ss(df_final[metric_features])  # get total sum of squares
ssw_labels = df_final[metric_features + ["label"]].groupby(by='label').apply(get_ss)  # compute ssw for each cluster labels
ssb = sst - np.sum(ssw_labels)  # remember: SST = SSW + SSB
r2 = ssb / sst
r2

### Quantization error:
The quantization error is given by the average distance between a unit and the data points mapped to it i.e. all the input data points that share it as BMU.

$$q_e = \frac{\sum_{i=0}^{N_e}||x_i - w_e||}{N_e}$$

$$Q = \frac{\sum_{e=0}^Eq_e}{E}$$
, where:

$Q$ is the overall quantization error of SOM,

$q_e$ is the quantization error for unit $e$,

$x_i$ is a data point/ observation,

$w_e$ is the unit $e$ representation in the input space,

$N_e$ is number of data points mapped to unit $e$,

$E$ is the number of units in the SOM grid

In [None]:
sm.quantization_error(som_data)

### Topographic error:

> 
> Returns the topographic error computed by finding
> the best-matching and second-best-matching neuron in the map
> for each input and then evaluating the positions.
>
> A sample for which these two nodes are not adjacent counts as
> an error. The topographic error is given by the
> the total number of errors divided by the total of samples.
> 
> If the topographic error is 0, no error occurred.
> If 1, the topology was not preserved for any of the samples
> 

https://github.com/JustGlowing/minisom/blob/master/minisom.py#L650


In [None]:
sm.topographic_error(som_data)
