/
ConditionalSmoothedScatterPlot.R
139 lines (127 loc) · 5.06 KB
/
ConditionalSmoothedScatterPlot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
check_align = function(arg) {
if(!(arg %in% c("center", "left", "right")))
stop(paste("align must be one of 'center','left', or 'right'"))
}
make_window = function(i, k, n, align="center") {
switch(align,
"center" = {halfwin = (k-1)/2
begin = i-halfwin
end = i+halfwin
c(begin, end)},
"left" = c(i-k+1, i),
"right" = c(i, i+k-1)
)
}
square_window_i = function(vec, i, k, align="center") {
window = make_window(i, k, length(vec), align)
begin=window[1]
end=window[2]
if(begin < 1 || end > length(vec)){
NA
}
else{
mean(vec[begin:end])
}
}
smoothing = function(frm, xvar, yvar, k, align) {
vec = frm[[yvar]]
vapply(seq_len(length(vec)),
FUN=function(i) {square_window_i(vec, i, k, align=align)},
numeric(1))
}
#' Plot a scatter plot with smoothing line.
#'
#' Plot a scatter plot with a smoothing line; the smoothing window is aligned either left, center or right.
#'
#' \code{xvar} is the continuous independent variable and \code{yvar} is the dependent binary variable.
#' Smoothing is by a square window of width \code{k}.
#'
#' If \code{palette} is NULL, and \code{groupvar} is non-NULL, plot colors will be chosen from the default ggplot2 palette.
#' Setting \code{palette} to NULL
#' allows the user to choose a non-Brewer palette, for example with \code{\link[ggplot2:scale_manual]{scale_fill_manual}}.
#'
#' @param frame data frame to get values from
#' @param xvar name of the independent column in frame. Assumed to be regularly spaced
#' @param yvar name of the dependent (output or result to be modeled) column in frame
#' @param groupvar name of the grouping column in frame. Can be NULL for an unconditional plot
#' @param title title for plot
#' @param ... no unnamed argument, added to force named binding of later arguments.
#' @param k width of smoothing window. Must be odd for a center-aligned plot. Defaults to 3
#' @param align smoothing window alignment: 'center', 'left', or 'right'. Defaults to 'center'
#' @param point_color color of points, when groupvar is NULL. Set to NULL to turn off points.
#' @param point_alpha alpha/opaqueness of points.
#' @param smooth_color color of smoothing line, when groupvar is NULL
#' @param palette name of Brewer palette, when groupvar is non-NULL (can be NULL)
#' @examples
#'
#' y = c(1,2,3,4,5,10,15,18,20,25)
#' x = seq_len(length(y))
#' df = data.frame(x=x, y=y, group=x>5)
#' WVPlots::ConditionalSmoothedScatterPlot(df, "x", "y", NULL,
#' title="left smooth, one group", align="left")
#' WVPlots::ConditionalSmoothedScatterPlot(df, "x", "y", "group",
#' title="left smooth, two groups", align="left")
#'
#' @export
ConditionalSmoothedScatterPlot = function(frame, xvar, yvar,
groupvar = NULL,
title = 'ConditionalSmoothedScatterPlot',
...,
k=3, align="center",
point_color="black",
point_alpha=0.2,
smooth_color="black",
palette="Dark2") {
vlist <- list(xvar = xvar, yvar = yvar)
if(!is.null(groupvar)) {
vlist$groupvar <- groupvar
}
frame <- as.data.frame(frame)
check_frame_args_list(...,
frame = frame,
name_var_list = vlist,
title = title,
funname = "WVPlots::ConditionalSmoothedScatterPlot")
if(!is.null(groupvar)) {
if(!isDiscrete(frame[[groupvar]])) {
stop(paste(groupvar, "should be discrete (factor, character, integer, or logical)"))
}
}
check_align(align)
if((k%%2)==0 && align=="center") {stop("For centered windows, k must be odd")}
# sort the frame by x
ord = order(frame[[xvar]])
frame = frame[ord,]
fs = frame
fs$smooth=0
if(is.null(groupvar)) {
fs$smooth = smoothing(fs, xvar, yvar, k, align)
fs = fs[!is.na(fs$smooth), ]
p = ggplot2::ggplot()
if(!is.null(point_color)) {
p = p + ggplot2::geom_point(data=frame,
ggplot2::aes_string(x=xvar, y=yvar),
color=point_color,
alpha=point_alpha)
}
p = p + ggplot2::geom_line(data=fs, ggplot2::aes_string(x=xvar, y="smooth"), color=smooth_color)
} else{
gplist = unique(fs[[groupvar]])
for(gp in gplist) {
ix = fs[[groupvar]]==gp
fs$smooth[ix] = smoothing(fs[ix, ], xvar, yvar, k, align)
}
fs = fs[!is.na(fs$smooth),]
p = ggplot2::ggplot()
if(!is.null(point_color)) {
p = p + ggplot2::geom_point(data=frame,
ggplot2::aes_string(x=xvar, y=yvar, color=groupvar),
alpha=point_alpha)
}
p = p + ggplot2::geom_line(data=fs, ggplot2::aes_string(x=xvar,y="smooth",color=groupvar))
if(!is.null(palette)) {
p = p + ggplot2::scale_color_brewer(palette=palette)
}
}
p + ggplot2::ggtitle(title)
}