In [1]:
# vectors
library(tidyverse)

-- [1mAttaching packages[22m --------------------------------------- tidyverse 1.2.1 --
[32mv[39m [34mggplot2[39m 3.2.1     [32mv[39m [34mpurrr  [39m 0.3.3
[32mv[39m [34mtibble [39m 3.0.1     [32mv[39m [34mdplyr  [39m 0.8.3
[32mv[39m [34mtidyr  [39m 1.0.0     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.3.1     [32mv[39m [34mforcats[39m 0.4.0
"package 'stringr' was built under R version 3.6.3"-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


In [2]:
# atomic vectors and lists is that atomic vectors are homogeneous, while lists can be heterogeneous

In [3]:
typeof(letters)

In [4]:
letters

In [7]:
length(letters)

In [8]:
c <- 1:10

In [14]:
c[c%%3==0]

In [17]:
#In R numbers are double by default
typeof(1)
typeof(1L)

In [24]:
# Integers have one special value: NA, while doubles have four: NA, NaN, Inf and -Inf
# Avoid using == to check for these other special values. 
# Instead use the helper functions is.finite(), is.infinite(), and is.nan(), is.na()
c(-1, 0, 1) / 0
is.na(NA)
is.na(NaN)

In [27]:
c(TRUE, F)

In [33]:
#to test type of vector use is_*() function
# is_logical()
# is_integer()
# is_character()
# is_numeric()
is_numeric(c(1,2,3,4,5))
is_integer(c(1,2,3,4,5))
is_integer(c(1L,2L,3L,4L,5L))
is_double(c(1,2,3,4,5))

"Deprecated"

In [35]:
# 					lgl		int		dbl		chr		list
# is_logical()		x				
# is_integer()				x			
# is_double()						x		
# is_numeric()				x		x		
# is_character()							x	
# is_atomic()		x		x		x		x	
# is_list()											x
# is_vector()		x		x		x		x		x

In [36]:
 rep(1:2, 2)

In [37]:
rep(1:2, each = 2)

In [38]:
tibble(x = 1:4, y = rep(1:2, 2))

x,y
<int>,<int>
1,1
2,2
3,1
4,2


In [44]:
# naming vectors
c <- c(x = 1, y = 2, z = 4)

In [61]:
#or 
set_names(1:3, c("a", "b", "c"))


In [62]:
# naming vectors good for subsetting
x <- c("one", "two", "three", "four", "five")
x[c(1, 1, 5, 5, 5, 2)]

In [63]:
# Negative values drop the elements at the specified positions:
x[c(-1, -3, -5)]

In [65]:
#subsetting named vectors
x <- c(abc = 1, def = 2, xyz = 5)
x[c("xyz", "def")]

In [69]:
x <- c("one", "two", "three", "four", "five")
x[]

In [71]:
# here is an important variation of [ called [[. [[ only ever extracts a single element, and always drops names.
# The distinction between [ and [[ is most important for lists

In [72]:
# List
x <- list(1, 2, 3)

In [75]:
x
str(x)
x_named <- list(a = 1, b = 2, c = 3)
str(x_named)

List of 3
 $ : num 1
 $ : num 2
 $ : num 3
List of 3
 $ a: num 1
 $ b: num 2
 $ c: num 3


In [94]:
x1 <- list(c(1, 2), c(3, 4))
x2 <- list(list(1, 2), list(3, 4))
x3 <- list(1, list(2, list(3)))
#all above are row based
x1
x2
x3


In [96]:
# SUBSETTING A LIST, 3 WAYS
a <- list(a = 1:3, b = "a string", c = pi, d = list(-1, -5))

In [99]:
# [ extracts a sub-list. The result will always be a list
str(a[1:2])
str(a[4])

List of 2
 $ a: int [1:3] 1 2 3
 $ b: chr "a string"
List of 1
 $ d:List of 2
  ..$ : num -1
  ..$ : num -5


In [102]:
# [[ extracts a single component from a list. It removes a level of hierarchy from the list
str(a[1])
str(a[[1]])
str(a[4])
str(a[[4]])

List of 1
 $ a: int [1:3] 1 2 3
 int [1:3] 1 2 3
List of 1
 $ d:List of 2
  ..$ : num -1
  ..$ : num -5
List of 2
 $ : num -1
 $ : num -5


In [103]:
# $ is a shorthand for extracting named elements of a list. It works similarly to [[ except that you don’t need to use quotes.
a$a
a[["a"]]

# The distinction between [ and [[ is really important for lists, 
# because [[ drills down into the list while [ returns a new, smaller list

# Attributes
# Any vector can contain arbitrary additional metadata through its attributes. 
# You can think of attributes as named list of vectors that can be attached to any object. 
# You can get and set individual attribute values with attr() or see them all at once with attributes()

In [107]:
# Atomic vectors and lists are the building blocks for other important vector types like factors and dates. 
# I call these augmented vectors, because they are vectors with additional attributes, including class

In [108]:
# four important augmented vectors:

# Factors
# Dates
# Date-times
# Tibbles

In [110]:
# Factors
# Factors are designed to represent categorical data that can take a fixed set of possible values
x <- factor(c("ab", "cd", "ab"), levels = c("ab", "cd", "ef"))
typeof(x)
attributes(x)

In [112]:
# Dates and date-times
# Dates in R are numeric vectors that represent the number of days since 1 January 1970
x <- as.Date("1971-01-01")
unclass(x)
class(x)
typeof(x)
attributes(x)