In [18]:
library(tidyverse)

In [3]:
### First-class functions 
# - behave like other data structure
# - eg. assign them to variables, store them in lists,
#   pass as arguments to other functions, create them inside functions, 
#   and return them as  result of a function.


### Pure functions
# Need to satistiy two properties
# 1. output only depends on the inputs
#    eg. not Sys.time() => return different values
# 2. no side-effects
#    eg. changing  value of global variable, write to disk
#        display on screen => eg. not print()
# 
# Advantage:      easier to reason about
# Disadvantages:  eg. can’t generate random numbers or read files

### R and functional programming
# ==> R is not a strict functional programming language,
#     because it does not require pure functions

# Writing pure function not necessary but:
# 1. partitiones the code better
# 2. easier to understand
# ==> if possible to write pure function - should do it



In [4]:
### Functional style
# 1. decomposing a big problem into smaller pieces
# 2. solve each piece with a function or combination of functions

# - decompose components of the problem into 
#   isolated functions that operate independently
# - each function  by itself simple & easy to understand
# ==> complexityhandled by composing functions in different ways


# Functionals

In [5]:
#### Functionals
# Are functions that
# - that take  function as input
# - return a vector as output
# => often used as replacement for loops

#### Allow to take
# - function that solves the problem for a single input 
# - generalise it to handle any number of inputs
# => important technique in data analysis



#### Problem of loops
# - very flexible 
# - conveys fact of iteration
#   NOT what is to be done with result!!!
# - same as 
#   while better than repeat
#   for   better than while 
# ==> functional better than for loop
#
# !!! Reason: !!!
# - each functional is tailored for specific task
# - see functional => immediately known why it is being used


## Examples
# lapply(), apply(), and tapply()
# purrr’s map()
# mathematical functionals: integrate(), optim()


In [6]:
## Example

# randomise <- function(f) f(runif(1000))

randomise <- function(func){ 
    func(runif(n=1000))
}

randomise(mean)
randomise(mean)
randomise(sum)


## map(): fundamental functional

In [7]:
# map()
# - takes:
#  1. vector 
#  2. function
# - calls the function once for each element of the vector
# - returns results in a list

# "map": 
# “Operation that associates each element of a given set 
#  with one or more elements of a second set”

## map() is lapply()
# difference = lapply() does not support helpers 

#### purrr::map()
#  written in C, preserves names, and supports several shortcuts



triple <- function(x) x * 3
map(1:3, triple)

In [8]:
## Basic Idea: map() implementation

my_map <- function(x, f, ...) {
  out <- vector("list", length(x))
  for (i in seq_along(x)) {
    out[[i]] <- f(x[[i]], ...)
  }
  out
}

### Atomic Vectors of specified type

In [9]:
#### Base R
# Two apply functions to return atomic vectors: 
# 1. sapply() 
#   => avoid, because tries to simplify result, 
#      (can return a list, a vector, or a matrix)
#.  ==> diffcult to program
# 2. vapply()
#   => safer - allows  to provide a template
#      FUN.VALUE - describes output shape
#      BUT more verbose
# 
### Equivalent:
#
# map_dbl(x, mean, na.rm = TRUE) 
# == 
# vapply(x, mean, na.rm = TRUE, FUN.VALUE = double(1))

In [10]:
### map() returns a list

print(map(mtcars[,1:3], mean))

# ...vs...

$mpg
[1] 20.09062

$cyl
[1] 6.1875

$disp
[1] 230.7219



In [11]:
# map_dbl() 
# => returns double vector
print(map_dbl(mtcars[,1:3], mean))

      mpg       cyl      disp 
 20.09062   6.18750 230.72188 


In [12]:
### Vectors of specified type

### map_chr() 
# => returns character vector
print(map_chr(mtcars[,1:3], typeof))

### map_lgl() 
# => returns logical vector
print(map_lgl(mtcars[,1:3], is.double))

### map_int()
# => returns integer vector
n_unique <- function(x) length(unique(x))
print(map_int(mtcars[,1:3], n_unique))


     mpg      cyl     disp 
"double" "double" "double" 
 mpg  cyl disp 
TRUE TRUE TRUE 
 mpg  cyl disp 
  25    3   27 


### Anonymous functions and shortcuts

In [13]:
### Using inline function
print(map_dbl(mtcars, function(x) length(unique(x))))

# => still very verbose -> shortcut

 mpg  cyl disp   hp drat   wt qsec   vs   am gear carb 
  25    3   27   22   22   29   30    2    2    3    6 


In [14]:
## special shortcut in purr

print(map_dbl(mtcars, ~ length(unique(.x))))

 mpg  cyl disp   hp drat   wt qsec   vs   am gear carb 
  25    3   27   22   22   29   30    2    2    3    6 


In [15]:
### Reason: 
# purrr functions translate formulas, 
# created by ~ (pronounced “twiddle”), into functions

# behind the scenes:
as_mapper(~ length(unique(.x)))

# => allows to refer to 
# - . for one argument functions, 
# - .x and .y. for two argument functions, 
# - ..1, ..2, ..3, for functions with arbitrary number of args



In [16]:
## for generating random data

x <- map(1:3, ~ runif(2))
str(x)

List of 3
 $ : num [1:2] 0.35 0.515
 $ : num [1:2] 0.706 0.767
 $ : num [1:2] 0.504 0.662


In [17]:
## Passing arguments with ...

# passing to function itself
x <- list(1:5, c(1:10, NA))
map_dbl(x, ~ mean(.x, na.rm = TRUE))

## Simpler version
x <- list(1:5, c(1:10, NA))
map_dbl(x, ~ mean(.x, na.rm = TRUE))


