/
ggnetworkmap.R
450 lines (403 loc) · 15.4 KB
/
ggnetworkmap.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
if (getRversion() >= "2.15.1") {
utils::globalVariables(c(
"lon", "lat", "group", "id",
"lon1", "lat1", "lon2", "lat2",
".label"
))
}
#' Network plot map overlay
#'
#' Plots a network with \pkg{ggplot2} suitable for overlay on a \pkg{ggmap} plot or \pkg{ggplot2}
#'
#' This is a descendant of the original \code{ggnet} function. \code{ggnet} added the innovation of plotting the network geographically.
#' However, \code{ggnet} needed to be the first object in the ggplot chain. \code{ggnetworkmap} does not. If passed a \code{ggplot} object as its first argument,
#' such as output from \code{ggmap}, \code{ggnetworkmap} will plot on top of that chart, looking for vertex attributes \code{lon} and \code{lat} as coordinates.
#' Otherwise, \code{ggnetworkmap} will generate coordinates using the Fruchterman-Reingold algorithm.
#'
#' @export
#' @param gg an object of class \code{ggplot}.
#' @param net an object of class \code{\link[network]{network}}, or any object
#' that can be coerced to this class, such as an adjacency or incidence matrix,
#' or an edge list: see \link[network]{edgeset.constructors} and
#' \link[network]{network} for details. If the object is of class
#' [igraph][igraph::igraph-package] and the
#' [intergraph][intergraph::intergraph-package] package is installed,
#' it will be used to convert the object: see
#' \code{\link[intergraph]{asNetwork}} for details.
#' @param size size of the network nodes. Defaults to 3. If the nodes are weighted, their area is proportionally scaled up to the size set by \code{size}.
#' @param alpha a level of transparency for nodes, vertices and arrows. Defaults to 0.75.
#' @param weight if present, the unquoted name of a vertex attribute in \code{data}. Otherwise nodes are unweighted.
#' @param node.group \code{NULL}, the default, or the unquoted name of a vertex attribute that will be used to determine the color of each node.
#' @param ring.group if not \code{NULL}, the default, the unquoted name of a vertex attribute that will be used to determine the color of each node border.
#' @param node.color If \code{node.group} is null, a character string specifying a color.
#' @param node.alpha transparency of the nodes. Inherits from \code{alpha}.
#' @param segment.alpha transparency of the vertex links. Inherits from \code{alpha}
#' @param segment.color color of the vertex links. Defaults to \code{"grey"}.
#' @param segment.size size of the vertex links, as a vector of values or as a single value. Defaults to 0.25.
#' @param great.circles whether to draw edges as great circles using the \code{geosphere} package. Defaults to \code{FALSE}
#' @param arrow.size size of the vertex arrows for directed network plotting, in centimeters. Defaults to 0.
#' @param label.nodes label nodes with their vertex names attribute. If set to \code{TRUE}, all nodes are labelled. Also accepts a vector of character strings to match with vertex names.
#' @param label.size size of the labels. Defaults to \code{size / 2}.
#' @param ... other arguments supplied to geom_text for the node labels. Arguments pertaining to the title or other items can be achieved through \pkg{ggplot2} methods.
#' @author Amos Elberg. Original by Moritz Marbach, Francois Briatte
#' @details This is a function for plotting graphs generated by \code{network} or \code{igraph} in a more flexible and elegant manner than permitted by ggnet. The function does not need to be the first plot in the ggplot chain, so the graph can be plotted on top of a map or other chart. Segments can be straight lines, or plotted as great circles. Note that the great circles feature can produce odd results with arrows and with vertices beyond the plot edges; this is a \pkg{ggplot2} limitation and cannot yet be fixed. Nodes can have two color schemes, which are then plotted as the center and ring around the node. The color schemes are selected by adding scale_fill_ or scale_color_ just like any other \pkg{ggplot2} plot. If there are no rings, scale_color sets the color of the nodes. If there are rings, scale_color sets the color of the rings, and scale_fill sets the color of the centers. Note that additional arguments in the ... are passed to geom_text for plotting labels.
#' @importFrom utils installed.packages
#' @examples
#' # small function to display plots only if it's interactive
#' p_ <- GGally::print_if_interactive
#'
#' invisible(lapply(c("ggplot2", "maps", "network", "sna"), base::library, character.only = TRUE))
#'
#' ## Example showing great circles on a simple map of the USA
#' ## http://flowingdata.com/2011/05/11/how-to-map-connections-with-great-circles/
#' \donttest{
#' airports <- read.csv("http://datasets.flowingdata.com/tuts/maparcs/airports.csv", header = TRUE)
#' rownames(airports) <- airports$iata
#'
#' # select some random flights
#' set.seed(123)
#' flights <- data.frame(
#' origin = sample(airports[200:400, ]$iata, 200, replace = TRUE),
#' destination = sample(airports[200:400, ]$iata, 200, replace = TRUE)
#' )
#'
#' # convert to network
#' flights <- network(flights, directed = TRUE)
#'
#' # add geographic coordinates
#' flights %v% "lat" <- airports[network.vertex.names(flights), "lat"]
#' flights %v% "lon" <- airports[network.vertex.names(flights), "long"]
#'
#' # drop isolated airports
#' delete.vertices(flights, which(degree(flights) < 2))
#'
#' # compute degree centrality
#' flights %v% "degree" <- degree(flights, gmode = "digraph")
#'
#' # add random groups
#' flights %v% "mygroup" <- sample(letters[1:4], network.size(flights), replace = TRUE)
#'
#' # create a map of the USA
#' usa <- ggplot(map_data("usa"), aes(x = long, y = lat)) +
#' geom_polygon(aes(group = group),
#' color = "grey65",
#' fill = "#f9f9f9", linewidth = 0.2
#' )
#'
#' # overlay network data to map
#' p <- ggnetworkmap(
#' usa, flights,
#' size = 4, great.circles = TRUE,
#' node.group = mygroup, segment.color = "steelblue",
#' ring.group = degree, weight = degree
#' )
#' p_(p)
#'
#' ## Exploring a community of spambots found on Twitter
#' ## Data by Amos Elberg: see ?twitter_spambots for details
#'
#' data(twitter_spambots)
#'
#' # create a world map
#' world <- fortify(map("world", plot = FALSE, fill = TRUE))
#' world <- ggplot(world, aes(x = long, y = lat)) +
#' geom_polygon(aes(group = group),
#' color = "grey65",
#' fill = "#f9f9f9", linewidth = 0.2
#' )
#'
#' # view global structure
#' p <- ggnetworkmap(world, twitter_spambots)
#' p_(p)
#'
#' # domestic distribution
#' p <- ggnetworkmap(net = twitter_spambots)
#' p_(p)
#'
#' # topology
#' p <- ggnetworkmap(net = twitter_spambots, arrow.size = 0.5)
#' p_(p)
#'
#' # compute indegree and outdegree centrality
#' twitter_spambots %v% "indegree" <- degree(twitter_spambots, cmode = "indegree")
#' twitter_spambots %v% "outdegree" <- degree(twitter_spambots, cmode = "outdegree")
#'
#' p <- ggnetworkmap(
#' net = twitter_spambots,
#' arrow.size = 0.5,
#' node.group = indegree,
#' ring.group = outdegree, size = 4
#' ) +
#' scale_fill_continuous("Indegree", high = "red", low = "yellow") +
#' labs(color = "Outdegree")
#' p_(p)
#'
#' # show some vertex attributes associated with each account
#' p <- ggnetworkmap(
#' net = twitter_spambots,
#' arrow.size = 0.5,
#' node.group = followers,
#' ring.group = friends,
#' size = 4,
#' weight = indegree,
#' label.nodes = TRUE, vjust = -1.5
#' ) +
#' scale_fill_continuous("Followers", high = "red", low = "yellow") +
#' labs(color = "Friends") +
#' scale_color_continuous(low = "lightgreen", high = "darkgreen")
#' p_(p)
#' }
#'
ggnetworkmap <- function(
gg,
net,
size = 3,
alpha = 0.75,
weight,
node.group,
node.color = NULL,
node.alpha = NULL,
ring.group,
segment.alpha = NULL,
segment.color = "grey",
great.circles = FALSE,
segment.size = 0.25,
arrow.size = 0,
label.nodes = FALSE,
label.size = size / 2,
...) {
require_namespaces(c("network", "sna"))
# sna # node placement if there is no ggplot object in function call
# -- conversion to network class ---------------------------------------------
if (inherits(net, "igraph") && "intergraph" %in% rownames(installed.packages())) {
net <- intergraph::asNetwork(net)
} else if (inherits(net, "igraph")) {
stop("install the 'intergraph' package to use igraph objects with ggnet")
}
if (!network::is.network(net)) {
net <- try(network::network(net), silent = TRUE)
}
if (!network::is.network(net)) {
stop("could not coerce net to a network object")
}
# -- network functions -------------------------------------------------------
get_v <- utils::getFromNamespace("%v%", ns = "network")
# -- network structure -------------------------------------------------------
vattr <- network::list.vertex.attributes(net)
is_dir <- ifelse(network::is.directed(net), "digraph", "graph")
if (!is.numeric(arrow.size) || arrow.size < 0) {
stop("incorrect arrow.size value")
} else if (arrow.size > 0 && is_dir == "graph") {
warning("network is undirected; arrow.size ignored")
arrow.size <- 0
}
if (network::is.hyper(net)) {
stop("ggnetworkmap cannot plot hyper graphs")
}
if (network::is.multiplex(net)) {
stop("ggnetworkmap cannot plot multiplex graphs")
}
if (network::has.loops(net)) {
warning("ggnetworkmap does not know how to handle self-loops")
}
# -- ... -------------------------------------------------------
# get arguments
labels <- label.nodes
# alpha default
inherit <- function(x) ifelse(is.null(x), alpha, x)
# get sociomatrix
m <- network::as.matrix.network.adjacency(net)
if (missing(gg)) {
# mapproj doesn't need to be loaded, but
# it needs to exist for ggplot2::coord_map() to work properly
if (!("mapproj" %in% installed.packages())) {
require_namespaces("mapproj")
}
gg <- ggplot() +
coord_map()
plotcord <- sna::gplot.layout.fruchtermanreingold(net, list(m, layout.par = NULL))
plotcord <- data.frame(plotcord)
colnames(plotcord) <- c("lon", "lat")
} else {
plotcord <- data.frame(
lon = as.numeric(get_v(net, "lon")),
lat = as.numeric(get_v(net, "lat"))
)
}
# Correct vertex labels
if (!is.logical(labels)) {
stopifnot(length(labels) == nrow(plotcord))
plotcord$.label <- labels
} else if ("id" %in% vattr) {
plotcord$.label <- as.character(get_v(net, "id"))
} else if ("vertex.names" %in% vattr) {
plotcord$.label <- network::network.vertex.names(net)
}
point_aes <- list(
x = substitute(lon),
y = substitute(lat)
)
point_args <- list(
alpha = substitute(inherit(node.alpha))
)
# get node groups
if (!missing(node.group)) {
plotcord$.ngroup <- get_v(net, as.character(substitute(node.group)))
if (missing(ring.group)) {
point_aes$color <- substitute(.ngroup)
} else {
point_aes$fill <- substitute(.ngroup)
}
} else if (!missing(node.color)) {
point_args$color <- substitute(node.color)
} else {
point_args$color <- substitute("black")
}
# rings
if (!missing(ring.group)) {
plotcord$.rgroup <- get_v(net, as.character(substitute(ring.group)))
point_aes$color <- substitute(.rgroup)
point_args$pch <- substitute(21)
}
#
#
# Plot edges
#
#
# get edgelist
edges <- network::as.matrix.network.edgelist(net)
edges <- data.frame(
lat1 = plotcord[edges[, 1], "lat"],
lon1 = plotcord[edges[, 1], "lon"],
lat2 = plotcord[edges[, 2], "lat"],
lon2 = plotcord[edges[, 2], "lon"]
)
edges <- subset(na.omit(edges), (!(lat1 == lat2 & lon2 == lon2)))
edge_args <- list(
linewidth = substitute(segment.size),
alpha = substitute(inherit(segment.alpha)),
color = substitute(segment.color)
)
edge_aes <- list()
# -- edge arrows -------------------------------------------------------------
if (!missing(arrow.size) && arrow.size > 0) {
edge_args$arrow <- substitute(arrow(
type = "closed",
length = unit(arrow.size, "cm")
))
}
# -- great circles -----------------------------------------------------------
if (great.circles) {
# geosphere # great circles
require_namespaces("geosphere")
pts <- 25 # number of intermediate points for drawing great circles
i <- 0 # used to keep track of groups when getting intermediate points for great circles
edges <- ddply(
.data = edges,
.variables = c("lat1", "lat2", "lon1", "lon2"),
.parallel = FALSE,
.fun = function(x) {
p1Mat <- x[, c("lon1", "lat1")]
colnames(p1Mat) <- NULL
p2Mat <- x[, c("lon2", "lat2")]
colnames(p2Mat) <- NULL
inter <- geosphere::gcIntermediate(
p1 = p1Mat,
p2 = p2Mat,
n = pts,
addStartEnd = TRUE,
breakAtDateLine = TRUE
)
if (!is.list(inter)) {
i <<- i + 1
inter <- data.frame(inter)
inter$group <- i
return(inter)
} else {
if (is.matrix(inter[[1]])) {
i <<- i + 1
ret <- data.frame(inter[[1]])
ret$group <- i
i <<- i + 1
ret2 <- data.frame(inter[[2]])
ret2$group <- i
return(rbind(ret, ret2))
} else {
ret <- data.frame(lon = numeric(0), lat = numeric(0), group = numeric(0))
for (j in 1:length(inter)) {
i <<- i + 1
ret1 <- data.frame(inter[[j]][[1]])
ret1$group <- i
i <<- i + 1
ret2 <- data.frame(inter[[j]][[2]])
ret2$group <- i
ret <- rbind(ret, ret1, ret2)
}
return(ret)
}
}
}
)
edge_aes$x <- substitute(lon)
edge_aes$y <- substitute(lat)
edge_aes$group <- substitute(group)
edge_args$data <- substitute(edges)
edge_args$mapping <- do.call(aes, edge_aes)
gg <- gg + do.call(geom_path, edge_args)
} else {
edge_aes$x <- substitute(lon1)
edge_aes$y <- substitute(lat1)
edge_aes$xend <- substitute(lon2)
edge_aes$yend <- substitute(lat2)
edge_args$data <- substitute(edges)
edge_args$mapping <- do.call(aes, edge_aes)
gg <- gg + do.call(geom_segment, edge_args)
}
#
#
# Done drawing edges, time to draws nodes
#
#
# custom weights: vertex attribute
# null weighting
sizer <- NULL
if (missing(weight)) {
point_args$size <- substitute(size)
} else {
# Setup weight-sizing
plotcord$.weight <- get_v(net, as.character(substitute(weight)))
# proportional scaling
if (is.factor(plotcord$.weight)) {
sizer <- scale_size_discrete(name = substitute(weight), range = c(size / nlevels(plotcord$weight), size))
} else {
sizer <- scale_size_area(name = substitute(weight), max_size = size)
}
point_aes$size <- substitute(.weight)
}
# Add points to plot
point_args$data <- substitute(plotcord)
point_args$mapping <- do.call(aes, point_aes)
gg <- gg +
do.call(geom_point, point_args)
if (!is.null(sizer)) {
gg <- gg +
sizer
}
# -- node labels -------------------------------------------------------------
if (isTRUE(labels)) {
gg <- gg + geom_text(
data = plotcord,
aes(x = lon, y = lat, label = .label),
size = label.size, ...
)
}
gg <- gg +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
labs(color = "", fill = "", size = "", y = NULL, x = NULL) +
theme(
panel.background = element_blank(),
legend.key = element_blank()
)
return(gg)
}