In [None]:
library(ggplot2)
library(gcookbook)
options(repr.plot.width=10, repr.plot.height=7, repr.plot.res = 200)

### Making a Violin plot

In [None]:
# Let's use a very simple dataset with more than 200 observations
str(heightweight)

In [None]:
# Violin plots are useful to compare density estimates of different groups
p <- ggplot(heightweight, aes(x=sex, y=heightIn))
p + geom_violin()

In [None]:
# Violin areas are scaled so that total area of each one is the same.
# If we use scale="count" the areas are proportional to the number of observations
p + geom_violin(scale="count") # notice the f violin is slightly narrower

In [None]:
# Violins also have smoothing lines, which can be controlled
# More smoohting:
p + geom_violin(adjust=2) # default is 1

In [None]:
# Less smoohting
p + geom_violin(adjust=0.5)

### Making a Density Plot of Two-Dimensional data

In [None]:
# The stat_density2d() function makes a 2D kernel density estimate from the data

In [None]:
str(faithful)
head(faithful)

In [None]:
p <- ggplot(faithful, aes(eruptions, y=waiting))
p + geom_point() + stat_density2d()

In [None]:
# stat_density2d has internal variables: level and density
# Let's use the level internal variable
p + stat_density2d(aes(color=..level..))

In [None]:
# The default is to use contour lines but it's also possible to use tiles and map density estimate to a fill color.

In [None]:
# Map density estimate to fill color using the internal density variable
p + stat_density2d(aes(fill=..density..), geom="raster", contour=FALSE)

In [None]:
# With points, map density estimate to alpha
# instead of raster (all points) use tiles
p + geom_point() +
    stat_density2d(aes(alpha=..density..), geom="tile", contour=FALSE) # raster should be slightly better

In [None]:
# Changing the x and y bandwidths by using a vector assigned to parameter h (this part is art!)
p + stat_density2d(aes(fill=..density..), geom="raster",
                   contour=FALSE, h=c(0.5,5))

### Adding Lines

In [None]:
p <- ggplot(heightweight, aes(x=ageYear, y=heightIn, color=sex)) + geom_point()
p

In [None]:
p + geom_hline(yintercept=60) + geom_vline(xintercept=14)

In [None]:
p + geom_abline(intercept=37.4, slope=1.75)

### Making a Circular Map

In [None]:
head(wind)

In [None]:
ggplot(wind, aes(x=DirCat, fill=SpeedCat)) +
    geom_histogram(binwidth=15) +
    coord_polar() +
    scale_x_continuous(limits=c(0,360))

In [None]:
ggplot(wind, aes(x=DirCat, fill=SpeedCat)) +
    geom_histogram(binwidth=15, color="black", size=0.25) +
    guides(fill=guide_legend(reverse=TRUE)) +
    coord_polar() +
    scale_x_continuous(limits=c(0,360), breaks=seq(0, 360, by=45), minor_breaks=seq(0, 360, by=15)) +
    scale_fill_brewer()

## Introduction to treemapify

In [None]:
install.packages("treemapify")

In [None]:
library(treemapify)

In [None]:
G20

In [None]:
ggplot(G20, aes(area = gdp_mil_usd, fill = hdi)) +
    geom_treemap()

In [None]:
# Now, let's identify the tiles using labels mapped to countries
# check if ggfittext is already installed.

ggplot(G20, aes(area = gdp_mil_usd, fill = hdi, label = country)) +
  geom_treemap() +
  geom_treemap_text(fontface = "italic", colour = "white",
                    place = "centre", grow = TRUE)

In [None]:
# Subgrouping in the treemap
ggplot(G20, aes(area = gdp_mil_usd, fill = hdi, label = country,
                subgroup = region)) +
  geom_treemap() +
  geom_treemap_subgroup_border() +
  geom_treemap_subgroup_text(place = "centre", grow = TRUE, alpha = 0.4, colour =
                             "black", fontface = "italic", min.size = 0) +
  geom_treemap_text(colour = "white", place = "topleft", reflow = T)

In [None]:
# Finally, grouping north and south regions
ggplot(G20, aes(area = 1, label = country, subgroup = hemisphere,
                subgroup2 = region, subgroup3 = econ_classification)) +
  geom_treemap() +
  geom_treemap_subgroup3_border(colour = "blue", size = 1) +
  geom_treemap_subgroup2_border(colour = "white", size = 3) +
  geom_treemap_subgroup_border(colour = "red", size = 5) +
  geom_treemap_subgroup_text(
    place = "middle",
    colour = "red",
    alpha = 0.5,
    grow = T
  ) +
  geom_treemap_subgroup2_text(
    colour = "white",
    alpha = 0.5,
    fontface = "italic"
  ) +
  geom_treemap_subgroup3_text(place = "top", colour = "yellow", alpha = 0.5) +
  geom_treemap_text(colour = "white", place = "middle", reflow = T)

### Just for the sake of curiosity, we can also use another package to create treemaps

In [None]:
# using the treemap
install.packages("treemap")
library(treemap)

In [None]:
# Create data
group <- c("group-1","group-2","group-3")
value <- c(13,5,22)
data <- data.frame(group,value)

In [None]:
# treemap
treemap(data,
            index="group",
            vSize="value",
            type="index"
            )

In [None]:
# Create data
group <- c(rep("group-1",4),rep("group-2",2),rep("group-3",3))
subgroup <- paste("subgroup" , c(1,2,3,4,1,2,1,2,3), sep="-")
value <- c(13,5,22,12,11,7,3,1,23)
data <- data.frame(group,subgroup,value)
 
# Custom labels:
treemap(data, index=c("group","subgroup"),     vSize="value", type="index",
 
    fontsize.labels=c(15,12),                # size of labels. Give the size per level of aggregation: size for group, size for subgroup, sub-subgroups...
    fontcolor.labels=c("white","orange"),    # Color of labels
    fontface.labels=c(2,1),                  # Font of labels: 1,2,3,4 for normal, bold, italic, bold-italic...
    bg.labels=c("transparent"),              # Background color of labels
    align.labels=list(
        c("center", "center"), 
        c("right", "bottom")
        ),                                   # Where to place labels in the rectangle?
    overlap.labels=0.5,                      # number between 0 and 1 that determines the tolerance of the overlap between labels. 0 means that labels of lower levels are not printed if higher level labels overlap, 1  means that labels are always printed. In-between values, for instance the default value .5, means that lower level labels are printed if other labels do not overlap with more than .5  times their area size.
    inflate.labels=F,                        # If true, labels are bigger when rectangle is bigger.
 
)