
# Comparing Twitter behavior of German politicians  before and after the European Election 2019 using Self-Organizing Maps
 
**2.Train the Self-Organizing Map**

Student Project on Self-Organizing Maps 

---


Authors: Clara Hoffmann & Oliver Becker

In [0]:
# mount drive
from google.colab import drive
drive.mount('/content/drive')

In [0]:
# specify path
# Clara
path = "/content/drive/My Drive/"
# Oliver
#path = "/content/gdrive/My Drive/"


In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
!pip install minisom
from minisom import MiniSom # computes soms
import math  

In [0]:
# load data
data =pd.read_csv((path + 'data_wordvectors_pers.csv'), sep=',').iloc[:, 1:]

indexvec = []
for col in range(0, len(data.columns)-3): 
  indexvec.append("x" + str(col+1))

In [0]:
# create vectors to train SOM for post and pre election
#pre
vectors_pre = data[data['time'] == 'pre']
vectors_pre = vectors_pre[indexvec]
vectors_pre = np.array(vectors_pre )
# post
vectors_post = data[data['time'] == 'post']
vectors_post = vectors_post[indexvec]
vectors_post = np.array(vectors_post )

In [0]:
# colors for our plots
category_color = {'CDU' : 'black',
                 'SPD': 'red',
                 'Grüne' : 'green',
                 'AfD' : 'darkgoldenrod',
                 'Linke': 'darkred',
                 'FDP' : 'gold',
                 'CSU' : 'blue'}
  
category_color2 = {'Annegreth Kramp-Karrenbauer': 'black',
                   'Jens Spahn': 'black',
                   'Markus Söder' : 'blue',
                   'Katharina Barley' : 'red',
                   'Andrea Nahles': 'red',
                   'Cem Özdemir' : 'limegreen',
                   "Annalena Baerbock":'limegreen',
                   "Alice Weidel": 'darkgoldenrod',
                   "Jörg Meuthen": 'darkgoldenrod',
                   "Katja Kipping": 'darkred',
                   "Gregor Gysi" : 'darkred',
                   "Christian Lindner": 'gold'}

**Train our SOM**

In [0]:
def trainsom(vectors):
  # choose map dimension as number of samples so that
  # each neuron gets mapped to a sample
  # in our case: one tweet
  map_dim = 30  #round(5*math.sqrt(len(vectors)))

  print("Training...")
  som = MiniSom(map_dim, map_dim, 200 ,sigma=3.0, random_seed=4568)
  #som.random_weights_init(vectors) # option to initialize weights randomly
  som.pca_weights_init(vectors)
  print("\n...ready!")
  
  # train SOM
  # due to our large # of iterations and dimensions this might take some time...
  # the option train_batch chooses oberservations sequentially instead of randomly
  #som.train_batch(vectors, len(vectors)*500) 
  som.train_random(vectors, len(vectors)*500) # picks vectors randomly
  user_map = som.labels_map(vectors, data['name'])
  model = som
  return(user_map, som)

In [0]:
# train SOMs
som_pre = trainsom(vectors_pre)
som_post = trainsom(vectors_post)

In [0]:
# build  model and usermap for plotting
som_pre_usermap = som_pre[0]
som_pre_model = som_pre[1]
# check usermap, if two politicians
# are mapped to the same neuron our 
# 3d plot doesn't work
som_pre_usermap 

In [0]:
som_post_usermap = som_post[0]
som_post_model = som_post[1]
som_post_usermap

In [0]:
# set map dimension
map_dim = 30

**Plot in 3d as cartographical map**

In [0]:
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import matplotlib.pyplot as plt
import matplotlib
from pylab import *
import numpy as np
import seaborn as sns

%matplotlib inline

matplotlib.rc('axes',edgecolor='gray')

def plot3d(user_map, som, modelname):
 fig = plt.figure(figsize=(map_dim,map_dim))
 ax = fig.add_subplot(111, projection='3d')
 #Sample color data
 heatmap = np.array(som.distance_map().T)
 #sample Z data
 z = np.array(som.distance_map().T)
 x = np.arange(0,map_dim , 1)
 y = np.arange(0, map_dim, 1)
 X, Y = np.meshgrid(x, y)
   
 #Create the surface
 surf = ax.plot_surface(X, Y, z, #rstride=1, cstride=1,  
                       linewidth=1, cmap='binary', edgecolor='none', 
                       shade=False , antialiased=True, alpha=0.5, 
                       zorder = 1) 

 ax.contour(X, Y, z, 15, linewidths = 0.3, colors='grey', zorder = 1);

 #Axis limits
 ax.set_xlim3d(0, map_dim)
 ax.set_ylim3d(0, map_dim)
 ax.xaxis.set_label_text("Node")
 ax.yaxis.set_label_text("Node")
 ax.zaxis.set_label_text("Euclidean Distance")
 # get x coordinate
 xob = []
 for p in user_map.items():
    xob.append(p[0][0])
 # get y coordinate
 yob = []
 for p in user_map.items():
    yob.append(p[0][1])
    
 name = []
 for p in user_map.items():
    test = re.sub(r'[(){}<>]', '', str(p[1]))
    test = re.sub(r'Counter', '', test)
    test = test.replace(': 1','')
    test = test.replace("'", "")
    name.append(test)
  
 coord = []
 lst = som.distance_map() #.T
 j = 0
 for i in xob :
   lst0 = lst[i]
   lst1 = lst0[yob[j]]
   coord.append(lst1)
   j = j+1

 # assign colors
 color = []
 for x in name:
  p = category_color2[x]
  color.append(p)
 color
  
 # plot points
 ax.scatter(xob,yob ,coord, label = name,  marker='.', s=120, c=color, zorder=50)
 # give names to points
 for i in range(0,len(xob)): #plot each point + it's index as text above
  ax.text(xob[i] + 1.5 ,yob[i] + 1.5 ,coord[i],  '%s' % (str(name[i])), size=30, zorder=20,  
  color = color[i], ha = 'center') #color=color

 #Set view angle
 ax.view_init(elev=80, azim=200 )

 # make background white
 ax.xaxis.pane.fill = False
 ax.yaxis.pane.fill = False
 ax.zaxis.pane.fill = False
 ax.xaxis.pane.set_edgecolor('w')
 ax.yaxis.pane.set_edgecolor('w')
 ax.zaxis.pane.set_edgecolor('w')

 # remove axes
 #ax.set_axis_off()
 #ax.set_xlabel('neuron x coordinate')
 #ax.set_ylabel('neuron y coordinate')
 #ax.set_zlabel('U-Matrix value')

 # Hide grid lines
 ax.grid(False)
 ax.set_ylabel('y coordinate')
 ax.set_xlabel('x coordinate')
 ax.set_zlabel('U-matrix value')

 # Hide axes ticks
 ax.set_xticks([])
 ax.set_yticks([])
 ax.set_zticks([])

 # define legend
 legend_elements = [Patch(facecolor=clr,
                         #edgecolor='w',
                         #label=l) for l, clr in category_color.items()]
 leg = plt.legend(handles=legend_elements, loc='center right', title="Partys", prop={'size': 27}) # , bbox_to_anchor=(1, .75)
 leg.get_frame().set_linewidth(0.0)
 plt.setp(leg.get_title(),size=27)
 cbaxes = fig.add_axes([0.81, 0.23, 0.05, 0.14]) 
 cbar = fig.colorbar(surf, shrink=-40, aspect=10, cax = cbaxes)
 cbar.set_label('U-matrix value', rotation = 360, labelpad=-10, y=1.2, size = 27)

 plt.show()


 fig.savefig(str(modelname) + '.pdf',dpi=300, bbox_extra_artists=(leg,), bbox_inches='tight', pad_inches = 0)


In [0]:
plot3d(som_pre_usermap, som_pre_model, 'premodel3d'  )

In [0]:
plot3d(som_post_usermap, som_post_model, 'postmodel3d'  )