`NOTE: This document will include plots, data, and code-snippets. (It will probably be a bit messy)`

# Libraries

In [581]:
source("tools.R")

# Figure of trend damping (quintessential example)

In [514]:
n_points <- 30

x <- 1:n_points

y <- 1:n_points
y <- jitter(y, factor = 50)

data_pos <- data.frame(x, y =  y)
data_neg <- data.frame(x, y = -y)

In [515]:
linear <- function(x) { x }

In [516]:
linear_neg <- function(x) { -x }

In [517]:
# Logistic function
L  <- 45
x0 <- 10 #midpoint value
k  <- 0.10

func <- function(x) { (L / (1 + exp(-k*(x - x0)))) - 12 }

In [518]:
# Plot
p1 <- ggplot(data=data_pos, aes(x=x, y=y)) +
            geom_point() +
            stat_function(fun = linear, colour = "red", lty="solid", size=1.1) +
            stat_function(fun = func, colour = "blue", lty="dashed", size=1.1) +

            # Axes
            coord_cartesian(ylim=c(1, 40)) + 
            scale_x_continuous(limits = c(1, 40)) + 
            #scale_y_continuous(limits = c(0, 40)) + 

            # Theme
            theme_bw() + 
            ggthemes::theme_few() + 
            theme(text = element_text(size=12, family="serif")) +
            theme(plot.title = element_text(hjust = 0.5)) +
            ggthemes::scale_color_solarized()

In [519]:
# Logistic function
L_  <- 35
x0_ <- 10 #midpoint value
k_  <- 0.1

func_ <- function(x) { - ((L_ / (1 + exp(-k_ *(x - x0_)))) - 9 )}

In [520]:
# Plot
p2<- ggplot(data=data_neg, aes(x=x, y=y)) +
            geom_point() +
            stat_function(fun = linear_neg, colour = "red", lty="solid", size=1.1) +
            stat_function(fun = func_, colour = "blue", lty="dashed", size=1.1) +

            # Axes
            coord_cartesian(ylim=c(-1, -40)) + 
            scale_x_continuous(limits = c(1, 40)) + 
            #scale_y_continuous(limits = c(0, 40)) + 

            # Theme
            theme_bw() + 
            ggthemes::theme_few() + 
            theme(text = element_text(size=12, family="serif")) +
            theme(plot.title = element_text(hjust = 0.5)) +
            ggthemes::scale_color_solarized()

In [522]:
svg("Images/trend_damping_explanation.svg", width=8, height=3.5)
#multiplot(p1, p2, cols=2)
dev.off()

# Real-world data

We have to get real world data for six scenarios. The state of each one currently is:

DONE:
- `Temperature: wunderground`
- `Rain: wunderground`
- `Gym members: https://trends.google.com/trends/explore?date=today%205-y&geo=GB&q=gym`
- `Sales: https://www.ons.gov.uk/businessindustryandtrade/retailindustry/timeseries/j43s/drsi`
- `Salary: http://www.hamiltonproject.org/charts/career_earnings_by_college_major/  <- Median annual earnings over career`
- `Facebook friends (US and Canada): https://www.statista.com/statistics/247614/number-of-monthly-active-facebook-users-worldwide/`

Sales: 
- Title	RSI:Value Not seasonally Adjusted:All Retailers ex fuel:All Business Index
- CDID	J43S
- Source dataset ID	DRSI
- PreUnit	
- Unit	
- Release date	20-09-17
- Next release	19-Oct-17


## Temperature

In [615]:
weather <- read_csv("data/real-world/weather.csv")

weather$temp_high <- as.numeric(weather$temp_high)

Parsed with column specification:
cols(
  .default = col_integer(),
  `Wind low` = col_character(),
  precipitation_sum_mm = col_double(),
  Events = col_character()
)
See spec(...) for full column specifications.


In [652]:
weather$year_month <- ifelse(nchar(weather$month) == 1,
                           paste0(weather$year, ' 0', weather$month), 
                           paste0(weather$year,  ' ', weather$month))

In [717]:
p <- weather %>%
        group_by(year_month) %>%
        summarize(temperature_mean = mean(temp_high)) %>%

     ggplot(aes(x=year_month, y=temperature_mean, group=1)) +
        geom_line() +
        theme(axis.text.x = element_text(angle = 90, hjust = 1))

svg("Images/debug_images/weather.svg", width=8, height=4)
p
dev.off()

In [665]:
#weather %>%
#ggplot(aes(x=id, y=temp_high, group=1)) +
#    geom_line()

## Rain

Non-moving window: probability of event

In [655]:
weather$rain <- grepl("Rain", weather$Events)

In [656]:
weather$week <- floor((weather$id-1) / 7) + 1

In [729]:
p <- weather %>%
        group_by(year_month) %>%
        summarize(rain_probability = sum(rain)/length(rain) * 100) %>%

     ggplot(aes(x=year_month, y=rain_probability, group=1)) +
        geom_line()  +
        theme(axis.text.x = element_text(angle = 90, hjust = 1))

svg("Images/debug_images/rain_probability.svg", width=8, height=4)
p
dev.off()

Non-moving window: precipitation sum

In [716]:
p <- weather %>%
        group_by(year_month) %>%
        summarize(precipitation_mean = mean(precipitation_sum_mm)) %>%

     ggplot(aes(x=year_month, y=precipitation_mean, group=1)) +
        geom_line() +
        theme(axis.text.x = element_text(angle = 90, hjust = 1))

svg("Images/debug_images/precipitation.svg", width=8, height=4)
p
dev.off()

## Salary

In [670]:
salary <- read_csv("data/real-world/salary.csv")

Parsed with column specification:
cols(
  year = col_integer(),
  age_us = col_integer(),
  age_uk = col_integer(),
  usd_per_year = col_integer()
)


In [671]:
p <- salary %>%
        filter(age_uk >= 25 & age_uk <= 28) %>%

        ggplot(aes(x=age_uk, y=usd_per_year, group=1)) +
            geom_line()

svg("Images/debug_images/salary.svg", width=8, height=4)
p
dev.off()

## Gym members

In [674]:
gym <- read_csv("data/real-world/gym.csv")

Parsed with column specification:
cols(
  id = col_integer(),
  week = col_character(),
  gym_uk = col_integer()
)


In [707]:
gym$day <- substr(gym$week, 1, 2)

gym$month <- substr(gym$week, 4, 5)

gym$year <- paste0(20, substr(gym$week, 7, 8))

gym$year_month <- paste(gym$year, gym$month)

In [718]:
p <- gym %>%
        filter(year >= 2013 & year <= 2016) %>%
            group_by(year_month) %>%
            summarize(gym_uk_mean = mean(gym_uk)) %>%

         ggplot(aes(x=year_month, y=gym_uk_mean, group=1)) +
            geom_line() +
            theme(axis.text.x = element_text(angle = 90, hjust = 1))


svg("Images/debug_images/gym.svg", width=8, height=4)
p
dev.off()

## Facebook friends

In [736]:
facebook <- read_csv("data/real-world/facebook.csv")

facebook$year_quarter <- paste(facebook$year, facebook$quarter)

Parsed with column specification:
cols(
  id = col_integer(),
  year = col_integer(),
  quarter = col_character(),
  users_millions = col_integer()
)


In [737]:
p <- facebook %>%
        ggplot(aes(x=year_quarter, y=users_millions, group=1)) +
            geom_line() +
            theme(axis.text.x = element_text(angle = 90, hjust = 1))


svg("Images/debug_images/facebook.svg", width=8, height=4)
p
dev.off()