## Line Graphs

### Basic line graphs

In [None]:
library(ggplot2)
library(gcookbook)
options(repr.plot.width=10, repr.plot.height=7, repr.plot.res = 200)

In [None]:
# a very simple dataset
BOD

In [None]:
ggplot(BOD, aes(x=Time, y=demand)) + geom_line() + geom_point()

In [None]:
BOD1 <- BOD
BOD1$Time <- factor(BOD$Time)
str(BOD1)

In [None]:
ggplot(BOD1, aes(x=Time, y=demand)) + geom_line() + geom_point()

In [None]:
# Use aes(group=1) to ensure ggplot() knows the data points belong together (same group) and
# should connected by a line
ggplot(BOD1, aes(x=Time, y=demand, group=2)) +
    geom_line() + geom_point()

In [None]:
# Use ylim() to begin Y axis from 0
ggplot(BOD1, aes(x=Time, y=demand, group=1)) +
    geom_line() + geom_point() + ylim(0, max(BOD$demand))

In [None]:
# Use the worldpop dataset
worldpop

In [None]:
ggplot(worldpop, aes(x=Year, y=Population)) +
    geom_line() + geom_point()

In [None]:
# Use log scale in the Y axis
ggplot(worldpop, aes(x=Year, y=Population)) +
    geom_line() + geom_point() + scale_y_log10()

### Creating a graph with several lines

In [None]:
# Create the environment
install.packages("dplyr")

In [None]:
library(plyr) #so we can use ddply() to create the example dataset
library(magrittr)

In [None]:
str(ToothGrowth)

In [None]:
ToothGrowth

In [None]:
tg <- ddply(ToothGrowth, c("supp", "dose"), summarise, length=mean(len)) # 'length' is a new column
# We are combining *all* supp with *all* dose, and in each case using the mean of the length

In [None]:
tg

In [None]:
ggplot(tg, aes(x=dose, y=length, color=supp)) + geom_line()

In [None]:
# Instead of different colors we can use different line types
ggplot(tg, aes(x=dose, y=length, linetype=supp)) + geom_line()

In [None]:
# we may also want to convert the "dose" to a categorical variable as it only gets the values 0.5 / 1.0 / 2.0
ggplot(tg, aes(x=factor(dose), y=length, color=supp, group=supp)) +
    geom_line()
# notice the group=supp to give information on how to group the data

In [None]:
# an incorrect grouping:
ggplot(tg, aes(x=dose, y=length)) + geom_line()

In [None]:
# this problem arised because there are multiple points at each y location
#
# Note: If any discrete variables are mapped to aesthetics like color or linetype, they are automatically
# used as grouping variables (which is good!)
#
# Otherwise, when using other variables for grouping, group= should be used.

In [None]:
ggplot(tg, aes(x=dose, y=length, shape=supp)) + geom_line() + geom_point(size=4)
# So, we have here an automatic grouping based on supp

In [None]:
# using a unique shape and fill it according to variable values (must be a categorical variable)
ggplot(tg, aes(x=dose, y=length, fill=supp)) +
    geom_line() + geom_point(size=5, shape=21)
    

In [None]:
# trying to avoid the overlapping of points and lines (higher values means more distant)
ggplot(tg, aes(x=dose, y=length, shape=supp)) +
    geom_line(position=position_dodge(0.2)) +
    geom_point(size=4, position=position_dodge(0.2))

### Changing the appearance of lines

In [None]:
# the line type can be set to solid,dashed,dotted, etc
# the thickness (in mm) can be changed using size
# the color with colour=

In [None]:
ggplot(BOD, aes(x=Time, y=demand)) +
    geom_line(linetype="dashed", size=2, color="blue")

In [None]:
library(plyr)
tg <- ddply(ToothGrowth, c("supp", "dose"), summarise, length=mean(len))
ggplot(tg, aes(x=dose, y=length, group=supp)) +
    geom_line(color="darkgreen", size=1.5)

In [None]:
# Using the Color Brewer
ggplot(tg, aes(x=dose, y=length, color=supp)) +
    geom_line(size=2) +
    scale_colour_brewer(palette="Set2")

In [None]:
ggplot(tg, aes(x=dose, y=length, color=supp)) +
    geom_line(linetype="dashed", size=2) +
    geom_point(shape=22, size=3, fill="white")

In [None]:
# Changing point appearance
ggplot(BOD, aes(x=Time, y=demand)) +
    geom_line() +
    geom_point(size=4, shape=22, color="darkred", fill="pink")

In [None]:
# the default shape for points is a solid circle (shape 21), the size is 2 and the color is black.
# The fill color is only relevant for some shapes.

In [None]:
# IMPORTANT: points should be drawn after lines, so they stay on the top layer

### Making a graph with a shaded area

In [None]:
# sunspot.year is a dataset
sunspot.year

In [None]:
sunspotyear <- data.frame(
    year = as.numeric(time(sunspot.year)) ,
    sunspots = as.numeric(sunspot.year)
)
sunspotyear

In [None]:
ggplot(sunspotyear, aes(x=year, y=sunspots)) + geom_area()

In [None]:
# alpha is the transparency of the filled region
ggplot(sunspotyear, aes(x=year, y=sunspots)) +
    geom_area(color="black", fill="blue", alpha=0.1)
#    geom_area(color="black", fill="blue", alpha=0.1)

In [None]:
ggplot(sunspotyear, aes(x=year, y=sunspots)) +
    geom_area(fill="green", alpha=0.2) + geom_line()

### Making a stacked area graph

In [None]:
library(gcookbook)
library(scales)

In [None]:
# Use the dataset USPopAge
head(uspopage,15)

In [None]:
ggplot(uspopage, aes(x=Year, y= Thousands, fill=AgeGroup)) +
    geom_area() + scale_y_continuous(labels = comma)

In [None]:
# reversing the legend order (using rev() on levels(), change palette to a range of blues,
# add thin lines between areas and use some transparency
ggplot(uspopage, aes(x=Year, y= Thousands, fill=AgeGroup)) +
    geom_area(color="black", size=0.2, alpha=0.4) +
    scale_fill_brewer(palette="Blues", breaks=rev(levels(uspopage$AgeGroup))) +
    scale_y_continuous(labels = comma)

### Using the color brewer

In [None]:
library(RColorBrewer)
display.brewer.all()

In [None]:
display.brewer.all(colorblindFriendly=TRUE)

In [None]:
display.brewer.pal(n=8, name="Dark2")
brewer.pal(n=8, name="Dark2")

### Creating a proportional stacked area graph

In [None]:
library(gcookbook)
library(plyr)

# first, create a modified dataset to include the proportional field
uspopage_prop <- ddply(uspopage, "Year", transform, Percent = Thousands / sum(Thousands) * 100)

In [None]:
uspopage_prop
str(uspopage_prop)

In [None]:
ggplot(uspopage_prop, aes(x=Year, y=Percent, fill=AgeGroup)) +
    geom_area(color="black", size=.2, alpha=.4) +
    scale_fill_brewer(palette="Blues")

### Adding a confidence region

In [None]:
# Climate dataset
climate

In [None]:
# get a subset of the climate data
clim <- subset(climate, Source=="Berkeley", select=c("Year", "Anomaly10y", "Unc10y"))
#
# Note: Anomaly10y is a 10-year running average of the deviation from the average 1950-1980 temperature
#       Unc10y is the 95% confidence interval

In [None]:
head(clim)

In [None]:
# Create the confidence region
# We must use the geom_ribbon() and map values to ymin and ymax
ggplot(clim, aes(x=Year, y=Anomaly10y)) +
    geom_ribbon(aes(ymin=Anomaly10y - Unc10y, ymax = Anomaly10y + Unc10y), alpha=0.2) +
   geom_line()

In [None]:
# Homework: represent the above graph using 3 lines, 2 for the confidence region and 1 for the series