5c4b3e2 Aug 2, 2017
1 contributor

Users who have contributed to this file

63 lines (47 sloc) 1.79 KB
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
from neupy import algorithms, environment'ggplot')
if __name__ == '__main__':
ggplot_colors = plt.rcParams['axes.prop_cycle']
colors = np.array([c['color'] for c in ggplot_colors])
dataset = datasets.load_iris()
# use only two features in order
# to make visualization simpler
data =[:, [2, 3]]
target =
sofm = algorithms.SOFM(
# Use only two features for the input
# Number of outputs defines number of features
# in the SOFM or in terms of clustering - number
# of clusters
# In clustering application we will prefer that
# clusters will be updated independently from each
# other. For this reason we set up learning radius
# equal to zero
# Training step size or learning rate
# Shuffles dataset before every training epoch.
# Instead of generating random weights
# (features / cluster centers) SOFM will sample
# them from the data. Which means that after
# initialization step 3 random data samples will
# become cluster centers
# Shows training progress in terminal
sofm.train(data, epochs=200)
plt.title('Clustering iris dataset with SOFM')
plt.xlabel('Feature #3')
plt.ylabel('Feature #4')
plt.scatter(*data.T, c=colors[target], s=100, alpha=1)
cluster_centers = plt.scatter(*sofm.weight, s=300, c=colors[3])
plt.legend([cluster_centers], ['Cluster center'], loc='upper left')