In [2]:
## Original code from: Self-Organizing Maps by Paras Chopra and Jorge Trigueros
from random import *
from math import *

class Node:
    def __init__(self, feature_size=10, prediction_size=10, x=0, y=0):
        self.feature_size = feature_size
        self.prediction_size = prediction_size
        self.feature_vector = [0.0] * feature_size              # Feature Vector
        self.prediction_vector = [0.0] * prediction_size        # Prediction Vector
        self.x = x                                              # X location
        self.y = y                                              # Y location
        self.feature_vector = [random() for _ in range(feature_size)]
        self.prediction_vector = [random() for _ in range(prediction_size)]


class SOM:
    # Let distance=False if you want to auto-calculate the distance
    def __init__(self, height=10, width=10, feature_size=10, prediction_size=10, distance=False, learning_rate=0.005):
        self.height = height
        self.width = width
        self.distance = distance if distance else (height+width)/2
        self.total = height * width
        self.learning_rate = learning_rate
        self.nodes = [0] * self.total
        self.feature_size = feature_size
        self.prediction_size = prediction_size
        
        for i in range(self.height):
            for j in range(self.width):
                self.nodes[i*self.width + j] = Node(feature_size, prediction_size, i, j)


    # Train_vector format: [ [feature_vector[0], prediction_vector[0]],
    #                        [feature_vector[1], prediction_vector[1]], so on..
    def train(self, train_vector, iterations=1000):
        time_constant = iterations / log(self.distance)

        for i in range(1,iterations+1):
            if i % 10 == 0: print(i, end=', ')

            distance_decaying = self.distance * exp(-1.0 * i/time_constant)
            learning_rate_decaying = self.learning_rate * exp(-1.0*i/time_constant)
            
            for j in range(len(train_vector)):
                input_feature = train_vector[j][0]
                input_prediction = train_vector[j][1]
                best = self.best_match(input_feature)
                
                stack = []
                for k in range(self.total):
                    distance = SOM.distance(self.nodes[best], self.nodes[k])

                    if distance < distance_decaying:
                        temporal_feature = [0.0] * self.feature_size
                        temporal_prediction=[0.0]*self.prediction_size
                        influence = exp((-1.0*(distance**2)) / (2*distance_decaying*i))

                        for l in range(self.feature_size):      # Learning
                            temporal_feature[l] = self.nodes[k].feature_vector[l] + influence * learning_rate_decaying \
                                                  * (input_feature[l] - self.nodes[k].feature_vector[l])

                        for l in range(self.prediction_size):   # Learning
                            temporal_prediction[l] = self.nodes[k].prediction_vector[l] + influence * learning_rate_decaying \
                                                     * (input_prediction[l] - self.nodes[k].prediction_vector[l])

                        # Push the unit onto stack to update in next interval
                        stack[0:0] = [[[k], temporal_feature, temporal_prediction]]

                for l in range(len(stack)):
                    self.nodes[stack[l][0][0]].feature_vector[:] = stack[l][1][:]
                    self.nodes[stack[l][0][0]].prediction_vector[:] = stack[l][2][:]


    # Returns prediction vector
    def predict(self, input_vector, return_coors=False):
        best = self.best_match(input_vector)

        if return_coors:
          return self.nodes[best].prediction_vector, self.nodes[best].x, self.nodes[best].y

        return self.nodes[best].prediction_vector
    
    
    # Returns best matching unit's index
    def best_match(self, target_feature):
        minimum = sqrt(self.feature_size)               # Minimum distance
        minimum_index = 1                               # Minimum distance unit
        
        for i in range(self.total):
            temp = self.feature_distance(self.nodes[i].feature_vector, target_feature)
            if temp < minimum:
                minimum = temp
                minimum_index = i
        
        return minimum_index

    def feature_distance(self, feature1, feature2):
        temp=0.0
        
        for j in range(self.feature_size):
            temp += (feature1[j]-feature2[j])**2

        temp = sqrt(temp)
        return temp

    @staticmethod
    def distance(n1: Node, n2: Node):
        return sqrt((n1.x - n2.x)**2 + (n1.y - n2.y)**2)


# Preparación de los datos y la red

In [3]:
import pandas as pd

HEIGHT = 30
WIDTH = 30
FEATURE_SIZE = 10
PREDICTION_SIZE = 1

# for year in ['2013', '2014', '2015']:
data = pd.read_csv('2013' + '.csv')
countries = data.columns[2:]
data = data[countries]
data = (data - data.mean()) / data.std()
training_set = []
kohonen_network = SOM(HEIGHT, WIDTH, FEATURE_SIZE, PREDICTION_SIZE, distance=False, learning_rate=0.05)

for index in range(len(countries)):
    training_set.append([data[countries[index]].to_numpy(), [index]])

# Entrenamiento

In [4]:
ITERATIONS = 2000
kohonen_network.train(training_set, ITERATIONS)

10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 320, 330, 340, 350, 360, 370, 380, 390, 400, 410, 420, 430, 440, 450, 460, 470, 480, 490, 500, 510, 520, 530, 540, 550, 560, 570, 580, 590, 600, 610, 620, 630, 640, 650, 660, 670, 680, 690, 700, 710, 720, 730, 740, 750, 760, 770, 780, 790, 800, 810, 820, 830, 840, 850, 860, 870, 880, 890, 900, 910, 920, 930, 940, 950, 960, 970, 980, 990, 1000, 1010, 1020, 1030, 1040, 1050, 1060, 1070, 1080, 1090, 1100, 1110, 1120, 1130, 1140, 1150, 1160, 1170, 1180, 1190, 1200, 1210, 1220, 1230, 1240, 1250, 1260, 1270, 1280, 1290, 1300, 1310, 1320, 1330, 1340, 1350, 1360, 1370, 1380, 1390, 1400, 1410, 1420, 1430, 1440, 1450, 1460, 1470, 1480, 1490, 1500, 1510, 1520, 1530, 1540, 1550, 1560, 1570, 1580, 1590, 1600, 1610, 1620, 1630, 1640, 1650, 1660, 1670, 1680, 1690, 1700, 1710, 1720, 1730, 1740, 1750, 1760, 1770, 1780, 1790, 1800, 1810, 1820, 1830, 1840, 1850

# Pruebas

In [5]:
count = 0
results = pd.DataFrame()

for index in range(len(countries)):
    prediction = kohonen_network.predict(data[countries[index]].to_numpy(), True)
    print(f'Input: {countries[index]}      Output: {countries[round(prediction[0][0])]}  Coors: {prediction[1:]}')
    if countries[index] == countries[round(prediction[0][0])]: count = count + 1

print(f'Número de aciertos: {count} de {len(countries)}')

Input: United States [USA]      Output: United States [USA]  Coors: (3, 7)
Input: United Kingdom [GBR]      Output: Vietnam [VNM]  Coors: (1, 10)
Input: Vietnam [VNM]      Output: Switzerland [CHE]  Coors: (9, 1)
Input: Switzerland [CHE]      Output: Switzerland [CHE]  Coors: (0, 14)
Input: Spain [ESP]      Output: Spain [ESP]  Coors: (11, 14)
Input: Qatar [QAT]      Output: Peru [PER]  Coors: (0, 1)
Input: Portugal [PRT]      Output: Portugal [PRT]  Coors: (12, 10)
Input: Peru [PER]      Output: Panama [PAN]  Coors: (5, 2)
Input: Panama [PAN]      Output: Norway [NOR]  Coors: (12, 6)
Input: Norway [NOR]      Output: Nicaragua [NIC]  Coors: (1, 0)
Input: New Zealand [NZL]      Output: Netherlands [NLD]  Coors: (0, 8)
Input: Nicaragua [NIC]      Output: Netherlands [NLD]  Coors: (14, 2)
Input: Netherlands [NLD]      Output: Mexico [MEX]  Coors: (4, 4)
Input: Mexico [MEX]      Output: Netherlands [NLD]  Coors: (7, 0)
Input: Luxembourg [LUX]      Output: Luxembourg [LUX]  Coors: (14, 14)


# Impresión del mapa

In [10]:
grid = []

for col in range(HEIGHT):
    row = [round(kohonen_network.nodes[i * WIDTH + col].prediction_vector[0]) for i in range(WIDTH)]
    grid.append(row)

grid_frame = pd.DataFrame(grid)
grid_frame.to_csv('map.csv', header=False, index=False)
print(grid_frame)

for i in range(len(countries)):
    country = countries[i]
    grid_frame = grid_frame.replace(i, country[-5:])

grid_frame.to_csv('map_names.csv', header=False, index=False)

In [15]:
count = []
three_letters_countries = []

for country in countries:
    three_letters_countries.append(country[-5:])
    count.append(grid_frame[grid_frame == country[-5:]].count().sum())

countries_frame = pd.DataFrame(three_letters_countries, count)
countries_frame.to_csv('countries.csv', header=False)