# KMeans - gaussian2d

<table>
    <tr>
        <td><img src="images/5.png" /></td>
    </tr>
</table>

In [None]:
# Load libraries.
library("cluster")
library("stats")
library("fpc")
library("ggplot2")
library("gridExtra")
library("ggpubr")

# Set seed.
set.seed(321)

In [None]:
# Load the data set.
load(paste(c("data/gaussian2d"), sep = "", collapse = ""))

In [None]:
# Scatter plot of x and y variables - color by class.
scatterPlot <- ggplot(df, aes(x.1, x.2, color = classes)) + 
    geom_point(size = 2.5, alpha = 0.6) + 
    scale_color_manual(values = c('#999999','#E69F00','#9ac0cd','#4682b4','#6e8b3d','#ee7600','#8b1a1a','#cd1076','#8b8970','#473c8b')) + 
    theme(legend.position = c(0,1), legend.justification = c(0,1)) + 
    stat_ellipse(type = "norm")

In [None]:
# Marginal density plot of x (top panel)
xdensity <- ggplot(df, aes(x.1, fill = classes)) + 
    geom_density(alpha = .5) + 
    scale_fill_manual(values = c('#999999','#E69F00','#9ac0cd','#4682b4','#6e8b3d','#ee7600','#8b1a1a','#cd1076','#8b8970','#473c8b')) + 
    theme(legend.position = "none")

In [None]:
# Marginal density plot of y (right panel)
ydensity <- ggplot(df, aes(x.2, fill = classes)) + 
    geom_density(alpha = .5) + 
    scale_fill_manual(values = c('#999999','#E69F00','#9ac0cd','#4682b4','#6e8b3d','#ee7600','#8b1a1a','#cd1076','#8b8970','#473c8b')) + 
    theme(legend.position = "none")  + 
    rotate()

In [None]:
# Dummy plot that will act as a place-holder.
blankPlot <- ggplot() + 
    geom_blank() + 
    theme(plot.background = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          panel.background = element_blank(),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          axis.text.x = element_blank(),
          axis.text.y = element_blank(),
          axis.ticks = element_blank())

In [None]:
# The grid package provides low-level functions to create graphical objects and position 
# them on a page in specific viewports.
grid.arrange(xdensity, 
             blankPlot, 
             scatterPlot, 
             ydensity, 
             ncol = 2, 
             nrow = 2, 
             widths = c(4, 1.4), 
             heights = c(1.4, 4))

In [None]:
# K-Means cluster analysis for n in 2 to 5
for(no.clusters in 2:5) {
    data <- df
    km <- kmeans(data, no.clusters)
    
    # Get cluster means.
    aggregate(data, by = list(km$cluster), FUN = mean)
    
    # Append cluster assignment.
    data <- data.frame(data, km$cluster)
    data$km.cluster <- as.factor(data$km.cluster)
    
    # Scatter plot of x and y variables - color by groups.
    scatterPlot <- ggplot(data, aes(x.1, x.2, color = km.cluster)) + 
        geom_point(size = 2.5, alpha = 0.6) + 
        scale_color_manual(values = c('#999999','#E69F00','#9ac0cd','#4682b4','#6e8b3d','#ee7600','#8b1a1a','#cd1076','#8b8970','#473c8b')) + 
        theme(legend.position = c(0,1), legend.justification = c(0,1)) + 
    stat_ellipse(type = "norm")
    
    # Marginal density plot of x (top panel)
    xdensity <- ggplot(data, aes(x.1, fill = km.cluster)) + 
        geom_density(alpha = .5) + 
        scale_fill_manual(values = c('#999999','#E69F00','#9ac0cd','#4682b4','#6e8b3d','#ee7600','#8b1a1a','#cd1076','#8b8970','#473c8b')) + 
        theme(legend.position = "none")
    
    # Marginal density plot of y (right panel)
    ydensity <- ggplot(data, aes(x.2, fill = km.cluster)) + 
        geom_density(alpha = .5) + 
        scale_fill_manual(values = c('#999999','#E69F00','#9ac0cd','#4682b4','#6e8b3d','#ee7600','#8b1a1a','#cd1076','#8b8970','#473c8b')) + 
        theme(legend.position = "none")  + 
        rotate()

    # Dummy plot that will act as a place-holder.
    blankPlot <- ggplot() + 
        geom_blank() + 
        theme(plot.background = element_blank(),
              panel.grid.major = element_blank(),
              panel.grid.minor = element_blank(),
              panel.border = element_blank(),
              panel.background = element_blank(),
              axis.title.x = element_blank(),
              axis.title.y = element_blank(),
              axis.text.x = element_blank(),
              axis.text.y = element_blank(),
              axis.ticks = element_blank())
    
    grid.arrange(xdensity, 
                 blankPlot, 
                 scatterPlot, 
                 ydensity, 
                 ncol = 2, 
                 nrow = 2, 
                 widths = c(4, 1.4), 
                 heights = c(1.4, 4))
}