-
Notifications
You must be signed in to change notification settings - Fork 9
/
datatable-faq.R
127 lines (106 loc) · 4 KB
/
datatable-faq.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
## ---- echo = FALSE, message = FALSE-------------------------------------------
library(data.table)
knitr::opts_chunk$set(
comment = "#",
error = FALSE,
tidy = FALSE,
cache = FALSE,
collapse = TRUE)
## -----------------------------------------------------------------------------
X = data.table(grp = c("a", "a", "b",
"b", "b", "c", "c"), foo = 1:7)
setkey(X, grp)
Y = data.table(c("b", "c"), bar = c(4, 2))
X
Y
X[Y, sum(foo*bar)]
X[Y, sum(foo*bar), by = .EACHI]
## -----------------------------------------------------------------------------
DF = data.frame(x = 1:3, y = 4:6, z = 7:9)
DF
DF[ , c("y", "z")]
## -----------------------------------------------------------------------------
DT = data.table(DF)
DT[ , c(y, z)]
## -----------------------------------------------------------------------------
DT[ , .(y, z)]
## -----------------------------------------------------------------------------
data.table(NULL)
data.frame(NULL)
as.data.table(NULL)
as.data.frame(NULL)
is.null(data.table(NULL))
is.null(data.frame(NULL))
## -----------------------------------------------------------------------------
DT = data.table(a = 1:3, b = c(4, 5, 6), d = c(7L,8L,9L))
DT[0]
sapply(DT[0], class)
## -----------------------------------------------------------------------------
DT = data.table(x = rep(c("a", "b"), c(2, 3)), y = 1:5)
DT
DT[ , {z = sum(y); z + 3}, by = x]
## -----------------------------------------------------------------------------
DT[ , {
cat("Objects:", paste(objects(), collapse = ","), "\n")
cat("Trace: x=", as.character(x), " y=", y, "\n")
sum(y)},
by = x]
## -----------------------------------------------------------------------------
DT[ , .(g = 1, h = 2, i = 3, j = 4, repeatgroupname = x, sum(y)), by = x]
DT[ , .(g = 1, h = 2, i = 3, j = 4, repeatgroupname = x[1], sum(y)), by = x]
## -----------------------------------------------------------------------------
A = matrix(1:12, nrow = 4)
A
## -----------------------------------------------------------------------------
A[c(1, 3), c(2, 3)]
## -----------------------------------------------------------------------------
B = cbind(c(1, 3), c(2, 3))
B
A[B]
## -----------------------------------------------------------------------------
rownames(A) = letters[1:4]
colnames(A) = LETTERS[1:3]
A
B = cbind(c("a", "c"), c("B", "C"))
A[B]
## -----------------------------------------------------------------------------
A = data.frame(A = 1:4, B = letters[11:14], C = pi*1:4)
rownames(A) = letters[1:4]
A
B
A[B]
## -----------------------------------------------------------------------------
B = data.frame(c("a", "c"), c("B", "C"))
cat(try(A[B], silent = TRUE))
## ---- eval = FALSE------------------------------------------------------------
# DT[where, select|update, group by][order by][...] ... [...]
## -----------------------------------------------------------------------------
DT = data.table(a = rep(1:3, 1:3), b = 1:6, c = 7:12)
DT
DT[ , { mySD = copy(.SD)
mySD[1, b := 99L]
mySD},
by = a]
## -----------------------------------------------------------------------------
DT = data.table(a = c(1,1,2,2,2), b = c(1,2,2,2,1))
DT
DT[ , list(.N = .N), list(a, b)] # show intermediate result for exposition
cat(try(
DT[ , list(.N = .N), by = list(a, b)][ , unique(.N), by = a] # compound query more typical
, silent = TRUE))
## -----------------------------------------------------------------------------
if (packageVersion("data.table") >= "1.8.1") {
DT[ , .N, by = list(a, b)][ , unique(N), by = a]
}
if (packageVersion("data.table") >= "1.9.3") {
DT[ , .N, by = .(a, b)][ , unique(N), by = a] # same
}
## -----------------------------------------------------------------------------
DT = data.table(a = 1:5, b = 1:5)
suppressWarnings(
DT[2, b := 6] # works (slower) with warning
)
class(6) # numeric not integer
DT[2, b := 7L] # works (faster) without warning
class(7L) # L makes it an integer
DT[ , b := rnorm(5)] # 'replace' integer column with a numeric column