In [None]:
#%matplotlib inline

In [4]:
import numpy as np
import matplotlib.pylab as plb

In [5]:
def som_step(centers,data,neighbor,eta,sigma):
    """Performs one step of the sequential learning for a
    self-organized map (SOM).

      centers = som_step(centers,data,neighbor,eta,sigma)

      Input and output arguments:
       centers  (matrix) cluster centres. Have to be in format:
                         center X dimension
       data     (vector) the actually presented datapoint to be presented in
                         this timestep
       neighbor (matrix) the coordinates of the centers in the desired
                         neighborhood.
       eta      (scalar) a learning rate
       sigma    (scalar) the width of the gaussian neighborhood function.
                         Effectively describing the width of the neighborhood
    """

    size_k = int(np.sqrt(len(centers)))

    #find the best matching unit via the minimal distance to the datapoint
    b = np.argmin(np.sum((centers - np.resize(data, (size_k**2, data.size)))**2,1))
    # find coordinates of the winner
    a,b = np.nonzero(neighbor == b)
    
    accumul = []
    
    
    # update all units
    for j in range(size_k**2):
        # find coordinates of this unit
        a1,b1 = np.nonzero(neighbor==j)
        # calculate the distance and discounting factor
        disc=gauss(np.sqrt((a-a1)**2+(b-b1)**2),[0, sigma])
        # update weights
        x = disc * eta * (data - centers[j,:])
        
        accumul.append(np.sum(x**2))

        centers[j,:] +=  x

    return accumul


def gauss(x,p):
    """Return the gauss function N(x), with mean p[0] and std p[1].
    Normalized such that N(x=p[0]) = 1.
    """
    return np.exp((-(x - p[0])**2) / (2 * p[1]**2))

def name2digits(name):
    """ takes a string NAME and converts it into a pseudo-random selection of 4
     digits from 0-9.

     Example:
     name2digits('Felipe Gerhard')
     returns: [0 4 5 7]
     """

    name = name.lower()

    if len(name)>25:
        name = name[0:25]

    primenumbers = [2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97]

    n = len(name)

    s = 0.0

    for i in range(n):
        s += primenumbers[i]*ord(name[i])*2.0**(i+1)

    import scipy.io.matlab
    Data = scipy.io.matlab.loadmat('hash.mat',struct_as_record=True)
    x = Data['x']
    t = np.mod(s,x.shape[0])

    return np.sort(x[t,:])

In [6]:
dim = 28*28
data_range = 255.0

# load in data and labels
data = np.array(np.loadtxt('data.txt'))
labels = np.loadtxt('labels.txt')

# select 4 digits
name = 'Bertrand Champenois' # REPLACE BY YOUR OWN NAME
targetdigits = name2digits(name) # assign the four digits that should be used
print(targetdigits) # output the digits that were selected
# this selects all data vectors that corresponds to one of the four digits
ii = np.logical_or.reduce([labels==x for x in targetdigits])
data = data[ii,:]
labels = labels[ii]
dy, dx = data.shape

[2 3 5 7]




In [22]:
#set the size of the Kohonen map. In this case it will be 6 X 6
size_k = 6

#set the width of the neighborhood via the width of the gaussian that
#describes it
sigma = 1

#initialise the centers randomly
centers = np.random.rand(size_k**2, dim) * data_range

#build a neighborhood matrix
neighbor = np.arange(size_k**2).reshape((size_k, size_k))

#set the learning rate
eta = 0.01 # HERE YOU HAVE TO SET YOUR OWN LEARNING RATE

#set the maximal iteration count
tmax = 8*2000 # this might or might not work; use your own convergence criterion

#set the random order in which the datapoints should be presented
i_random = np.arange(tmax) % dy
np.random.shuffle(i_random)

In [23]:
def score(centers, data):
    s = 0
    for i in range(2000):
        mini = np.argmin(np.sum((centers - np.resize(data[i], (size_k**2, 784)))**2,1))
        s += np.sum((centers[mini]-data[i])**2)
    return s

In [24]:
accumuls = []

for t, i in enumerate(i_random):
    aa = som_step(centers, data[i,:],neighbor,eta,sigma)
    if t%50==0:
        accumuls.append(score(centers, data))
    if t%500==0:
        print(t/tmax*100)

plb.figure()
plb.plot(accumuls)
plb.show()

for j in range(size_k ** 2):
    plb.subplot(size_k, size_k, j + 1)
    plb.imshow(np.reshape(centers[j, :], [28, 28]), interpolation='nearest', cmap='Greys')
    plb.axis('off')
plb.show()

0.0
3.125
6.25
9.375
12.5
15.625
18.75
21.875
25.0
28.125
31.25
34.375
37.5
40.625
43.75
46.875
50.0
53.125
56.25
59.375
62.5
65.625
68.75
71.875
75.0
78.125
81.25
84.375
87.5
90.625
93.75
96.875


In [31]:
def predictNeurons(centers, data, labels, targetdigits):
    accumul = np.zeros((len(centers),4))
    for i in range(2000):
        mini = np.argmin(np.sum((centers - np.resize(data[i], (size_k**2, 784)))**2,1))
        accumul[mini,np.where(targetdigits==labels[i])] += 1
    for j in range(len(centers)):
        accumul[j] = targetdigits[np.argmax(accumul[j])]
    return accumul[:,0]

In [32]:
predictNeurons(centers, data, labels, targetdigits)


array([ 7.,  7.,  5.,  5.,  5.,  3.,  7.,  7.,  5.,  3.,  3.,  3.,  7.,
        7.,  7.,  3.,  3.,  3.,  7.,  7.,  2.,  2.,  3.,  3.,  5.,  2.,
        2.,  2.,  2.,  2.,  5.,  2.,  2.,  2.,  2.,  2.])

In [20]:
plb.imshow(np.reshape(data[1], [28, 28]))
plb.show()

In [55]:
labels[1]

2.0

In [56]:
neighbor

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])