# Example of SOMs with the IRIS dataset 

## Load the libraries and dataset:

In [None]:
# load Kohonen package
require(kohonen)

In [None]:
# slightly reduce plotsize
options(repr.plot.width=5, repr.plot.height=5)

In [None]:
# load my own plotting functions:
source("somComponentPlanePlottingFunction.R")
source("plotUMatrix.R")
source("addClusterBoundaries.R")
source("addText.R")

In [None]:
data(iris)
str(iris)

In [None]:
# create SOM data
colnames(iris)[1:4]
species = iris['Species']
somdata = scale(iris[colnames(iris)[1:4]])

## Training a simple SOM:

In [None]:
# train SOM
data.som1 <- som(somdata,
                 rlen=1000, # the number of times the complete data set will be presented to the network
                 alpha = c(0.5, 0.01), # default = c(0.05, 0.01)
                 grid = somgrid(10, 10, "hexagonal", toroidal = FALSE) # "hexagonal" or "rectangular"
                ) 

In [None]:
# plot training process: # only after 500 views, a kind of plateau is visible!
plot(data.som1, type="changes")

In [None]:
# clustering:
#som_cluster <- cutree(hclust(dist(data.som1$codes[[1]])), 2) # assuming three clusters!
som_cluster <- cutree(hclust(dist(data.som1$codes[[1]]), method="ward.D"), 3) # assuming three clusters!
som_cluster2 <- cutree(hclust(dist(data.som1$codes[[1]]), method="ward.D2"), 3) # assuming three clusters!

In [None]:
# alternative clustering:
clust <- kmeans(data.som1$codes[[1]], 3)

In [None]:
#distance matrix between the cells
dc <- dist(data.som1$codes[[1]])
#hac – the option “members” is crucial
nb = table(data.som1$unit.classif)
cah <- hclust(dc, method="ward.D2")
plot(cah,hang=-1, labels=F)
# As one can see, 2 clusters makes the most sense!
#visualizing the clusters into the dendrogram
rect.hclust(cah,k=2)

In [None]:
# VISUALIZE RESULTS COMPONENT PLANES
# ideally the sample distribution is relatively uniform
par(mar = rep(1, 4))
cplanelay <- layout(matrix(1:4, nrow=2))
vars <- colnames(data.som1$data[[1]])
for(p in vars) {
  plotCplane(som_obj=data.som1, variable=p, legend=FALSE, type="Quantile") # or "Quantile"
  add.cluster.boundaries(data.som1, som_cluster, col=rgb(0,0,0.5,0.7), lwd=3)
  add.cluster.boundaries(data.som1, som_cluster2, col=rgb(0.5,0,0,0.7), lwd=3)
  #add.cluster.boundaries(data.som1, clust$cluster, col=rgb(0,0,0,0.4), lwd=2)
  add.numbers(data.som1, scale=0.5)
}

In [None]:
##PLOT U-MATRIX
par(mar=c(5,0,5,3.6))
neigh.dists <- plotUmat(data.som1, type = "Equal Interval") # type is Quantile or "Equal Interval"(default)
add.points(data.som1, scale=0.6)
add.cluster.boundaries(data.som1, som_cluster)

# add color bar
limits <- c( min(neigh.dists, na.rm=TRUE), max(neigh.dists, na.rm=TRUE) )
image.plot(add=TRUE, legend.width=1,
    legend.only=TRUE, col=rev(designer.colors(n=50, col=brewer.pal(9, "Spectral"))), zlim=limits)

In [None]:
mypal <- function(n){
    rev(
        colorRampPalette(
            #brewer.pal(11, "Spectral")
            brewer.pal(9, "Spectral")
        )(n)
    )
} 

plot(data.som1, type = "dist.neighbours",
     #palette.name = colorRampPalette(brewer.pal(11, "Spectral")),
     #palette.name=terrain.colors,
     palette.name = mypal,
     shape="straight")
kohonen::add.cluster.boundaries(data.som1, som_cluster)

## training + test

In [None]:
# Do I have to sort them?!
training_indices <- sample(nrow(somdata), 100)
data.training <- scale(somdata[training_indices,])
data.testing <- scale(somdata[-training_indices,], 
                      center = attr(data.training, "scaled:center"),
                      scale = attr(data.training, "scaled:scale"))
original_test_indices = seq(1,150)[-training_indices]

In [None]:
# get species of training set and convert to categorical one hot encoding
Y = classvec2classmat(species[training_indices,])
# train new SOM.
data.som2 <- xyf(data.training, Y, 
    grid = somgrid(8, 8, "hexagonal"),
                rlen=300,
                user.weights=c(1,1))
summary(data.som2)

In [None]:
# whatmap the numbers of the data layers in the kohonen object used in the mapping on which the predictions are based.
predictions <- predict(data.som2, data.testing, whatmap=1)
table(species[-training_indices,], predictions$predictions[[2]])

In [None]:
plot(data.som2, type = "dist.neighbours", palette.name = mypal, shape="straight")

In [None]:
plot(data.som2, type = "codes", main = c("Codes X", "Codes Y"), shape="straight")
data.som2.hc <- cutree(hclust(dist(data.som2$codes[[2]])), 3)
kohonen::add.cluster.boundaries(data.som2, data.som2.hc)

In [None]:
bg.pallet <- c("red", "blue", "yellow", "purple", "green")

# make a vector of just the background colors for all map cells
position.predictions <- classmat2classvec(predict(data.som2)$unit.predictions[[2]])
base.color.vector <- bg.pallet[match(position.predictions, levels(species$Species))]

# set alpha to scale with maximum confidence of prediction
bgcols <- c()
max.conf <- apply(data.som2$codes[[2]], 1, max)
for (i in 1:length(base.color.vector)) {
    bgcols[i] <- adjustcolor(base.color.vector[i], max.conf[i])
}

In [None]:
par(mar = c(0, 0, 0, 4), xpd = TRUE)
plot(data.som2, type = "mapping", pchs = 21, col = "black", bg = bg.pallet[match(species$Species, 
    levels(species$Species))], bgcol = bgcols, shape="straight")

legend("topright", legend = levels(species$Species), text.col = bg.pallet, bty = "n", 
    inset = c(-0.03, 0))