/
04_Data-structure.R
115 lines (97 loc) · 3.72 KB
/
04_Data-structure.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
## ----eval=FALSE----------------------------------------------------------
## library(DBI)
## con <- dbConnect(RSQLite::SQLite(), "test.db") # create an empty db
## dbWriteTable(con, "mtcars", mtcars) # copy a table to the database
##
## dbListFields(con, "mtcars")
## dbReadTable(con, "mtcars") # retrieve data from the data base
##
## # You can fetch all results:
## res <- dbSendQuery(con, "SELECT * FROM mtcars WHERE cyl = 4")
## dbFetch(res)
## dbDisconnect(con)
## ----eval=FALSE----------------------------------------------------------
## library(dplyr)
##
## # project and rename columns
## my_iris <- select( iris
## , sepal_width = Sepal.Width
## , species = Species
## )
##
## # select rows
## my_iris <- filter(my_iris, sepal_width > 3)
##
## # group by
## my_iris <- group_by(my_iris, species)
##
## summarize( my_iris, count = n()
## , mean=mean(sepal_width)
## ) # count total
## ----eval=FALSE----------------------------------------------------------
## library(dplyr)
##
## iris %>% # data set
## select( sepal_width = Sepal.Width # project and rename columns
## , species = Species) %>%
## filter( sepal_width > 3) %>% # select rows
## group_by(species) %>% # group by
## summarize( count = n() # calculate aggregates
## , mean=mean(sepal_width)
## ) # count total
## ------------------------------------------------------------------------
A <- matrix(1:6, nrow=3, dimnames=list(NULL, c("a", "b")))
as.data.frame(A)
as.matrix(as.data.frame(A))
## ------------------------------------------------------------------------
dat <- data.frame(num=1:2, text=c("a", "b"))
M <- as.matrix(dat)
M
as.data.frame(M)
## ----message=FALSE-------------------------------------------------------
str(Nile)
start(Nile)
end(Nile)
frequency(Nile) #annual data so frequency == 1
## ------------------------------------------------------------------------
data <- rnorm(3*4) # 3 years
(quarter <- ts(data = data, start = 2012, frequency = 4))
## ----echo=FALSE, fig.cap="UK lung deaths", fig.height=6, fig.lp="fig:ldeath"----
par(mfrow=c(2,1))
plot(ldeaths, las=1, ylab="", xlab="") #, main="UK lung deaths")
monthplot(ldeaths, las=1, ylab="")#, main="UK lung deaths p/month")
## ------------------------------------------------------------------------
data <- matrix(rnorm(2*4), ncol=2, dimnames=list(NULL, c("a","b")))
(quarter <- ts(data = data, start = 2012, frequency = 4))
## ----echo=FALSE, fig.cap="directed graph", message=FALSE-----------------
library(igraph)
g <- graph(c(1,2, 1,3, 3,4))
V(g)$label <- c("a","b","c","d")
plot(g, vertex.color="white", edge.color="black", layout=layout.reingold.tilford)
## ----message=FALSE-------------------------------------------------------
library(jsonlite)
fromJSON("http://api.census.gov/data/2000/sf1/tags.json")
fromJSON("http://api.worldbank.org/countries/bra?format=json")[[2]]
## ----include=FALSE-------------------------------------------------------
library(tidyr)
library(dplyr)
## ------------------------------------------------------------------------
library(tidyr)
library(dplyr)
finance_messy <- data.frame(
name = c("Alice", "Bob", "Carla"),
tax_2015 = c(40, 10, 2),
income_2015 = c(41, 90, 100)
)
finance_messy
## ------------------------------------------------------------------------
finance_messy %>%
gather(variable, amount, -name)
## ------------------------------------------------------------------------
finance_tidy <-
finance_messy %>%
gather(variable, amount, -name) %>%
separate(variable, into = c("variable", "year"))
## ------------------------------------------------------------------------
finance_tidy %>%
spread(variable, amount)