-
Notifications
You must be signed in to change notification settings - Fork 6
/
ggcoefplot.R
319 lines (315 loc) · 13.6 KB
/
ggcoefplot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
#' @title Draw coefficient plots and interaction plots from `fixest` regression
#' objects.
#'
#' @description Draws the `ggplot2` equivalents of `fixest::coefplot` and
#' `fixest::iplot`. These "gg" versions do their best to recycle the same
#' arguments and plotting logic as their original base counterparts. But they
#' also support additional features via the `ggplot2` API and infrastructure.
#' The overall goal remains the same as the original functions. To wit:
#' `ggcoefplot` plots the results of estimations (coefficients and confidence
#' intervals). The function `ggiplot` restricts the output to variables
#' created with `i`, either interactions with factors or raw factors.
#' @md
#' @param object A model object of class `fixest` or `fixest_multi`, or a list
#' thereof.
#' @param geom_style Character string. One of `c('pointrange', 'errorbar', 'ribbon')`
#' describing the preferred geometric representation of the coefficients. Note
#' that ribbon plots not supported for `ggcoefplot`, since we cannot guarantee
#' a continuous relationship among the coefficients.
#' @param multi_style Character string. One of `c('dodge', 'facet')`, defining
#' how multi-model objects should be presented.
#' @param aggr_eff A keyword string or numeric sequence, indicating whether
#' mean treatment effects for some subset of the model should be displayed as
#' part of the plot. For example, the "post" keyword means that the mean
#' post-treatment effect will be plotted alongside the individual period
#' effects. Passed to [`aggr_es`]; see that function's documentation for other
#' valid options.
#' @param aggr_eff.par List. Parameters of the aggregated treatment effect line,
#' if plotted. The default values are `col = 'gray50'`, `lwd = 1`, `lty = 1`.
#' @param facet_args A list of arguments passed down to `ggplot::fact_wrap()`.
#' E.g. `facet_args = list(ncol = 2, scales = 'free_y')`. Only used if
#' `multi_style = 'facet'`.
#' @param theme ggplot2 theme. Defaults to `theme_linedraw()` with some minor
#' adjustments, such as centered plot title. Can also be defined on an
#' existing ggiplot object to redefine theme elements. See examples.
#' @param ... Arguments passed down to, or equivalent to, the corresponding
#' `fixest::coefplot`/`fixest::iplot` arguments. Note that some of these
#' require list objects. Currently used are:
#' * `keep` and `drop` for subsetting variables using regular expressions. The `fixest::iplot` help page includes more detailed examples, but these should generally work as you expect. One useful regexp trick worth mentioning briefly for event studies with many pre-/post-periods is `drop = "[[:digit:]]{2}"`. This will cause the plot to zoom in around single digit pre-/post-periods.
#' * `group` a list indicating variables to group over. Each element of the list reports the coefficients to be grouped while the name of the element is the group name. Each element of the list can be either: i) a character vector of length 1, ii) of length 2, or iii) a numeric vector. Special patterns such as "^^var_start" can be used to more appealing plotting, where group labels are separated from their subsidiary labels. This can be especially useful for plotting interaction terms. See the Details section of `fixest::coefplot` for more information.
#' * `i.select` Integer scalar, default is 1. In `ggiplot`, used to select which variable created with `i()` to select. Only used when there are several variables created with `i`. See the Details section of `fixest::iplot` for more information.
#' * `main`, `xlab`, and `ylab` for setting the plot title, x- and y-axis labels, respectively.
#' * `zero` and `zero.par` for defining or adjusting the zero line. For
#' example, `zero.par = list(col = 'orange')`.
#' * `ref.line` and `ref.line.par` for defining or adjusting the vertical
#' reference line. For example, `ref.line.par = list(col = 'red', lty = 4)`.
#' * `pt.pch` and `pt.join` for overriding the default point estimate shapes and joining them, respectively.
#' * `col` for manually defining line, point, and ribbon colours.
#' * `ci_level` for changing the desired confidence level (default = 0.95).
#' Note that multiple levels are allowed, e.g. `ci_level = c(0.8, 0.95)`.
#' * `ci.width` for changing the width of the extremities of the confidence
#' intervals. Only used if `geom_style = "errorbar"` (or if multiple CI levels
#' are requested for the default pointrange style). The default value is 0.2.
#' * `ci.fill.par` for changing the confidence interval fill. Only used when
#' `geom_style = "ribbon"` and currently only affects the alpha (transparency)
#' channel. For example, we can make the CI band lighter with
#' `ci.fill.par = list(alpha = 0.2)` (the default alpha is 0.3).
#' * `dict` a dictionary for overriding coefficient names.
#' * `vcov`, `cluster` or `se` as alternative options for adjusting the
#' standard errors of the model object(s) on the fly. See `summary.fixest` for
#' details. Written here in superseding order; `cluster` will only be
#' considered if `vcov` is not null, etc.
#' @details These functions generally try to mimic the functionality and (where
#' appropriate) arguments of `fixest::coefplot` and `fixest::iplot` as
#' closely as possible. However, by leveraging the ggplot2 API and
#' infrastructure, they are able to support some more complex plot
#' arrangements out-of-the-box that would be more difficult to achieve using
#' the base `coefplot`/`iplot` alternatives.
#' @seealso [fixest::coefplot()], [fixest::iplot()].
#' @return A ggplot2 object.
#' @import ggplot2 fixest
#' @export
#' @examples
#' library(ggfixest)
#'
#' ##
#' # Author note: The examples that follow deliberately follow the original
#' # examples from the coefplot/iplot help pages. A few "gg-" specific
#' # features are sprinkled within, with the final set of examples in
#' # particular highlighting unique features of this package.
#'
#'
#' #
#' # Example 1: Basic use and stacking two sets of results on the same graph
#' #
#'
#' # Estimation on Iris data with one fixed-effect (Species)
#' est = feols(Petal.Length ~ Petal.Width + Sepal.Length + Sepal.Width | Species, iris)
#'
#' ggcoefplot(est)
#'
#' # Show multiple CIs
#' ggcoefplot(est, ci_level = c(0.8, 0.95))
#'
#' # By default, fixest model standard errors are clustered by the first fixed
#' # effect (here: Species).
#' # But we can easily switch to "regular" standard-errors
#' est_std = summary(est, se = "iid")
#'
#' # You can plot both results at once in the same plot frame...
#' ggcoefplot(list("Clustered" = est, "IID" = est_std))
#' # ... or as separate facets
#' ggcoefplot(list("Clustered" = est, "IID" = est_std), multi_style = "facet") +
#' theme(legend.position = "none")
#'
#'
#' #
#' # Example 2: Interactions
#' #
#'
#'
#' # Now we estimate and plot the "yearly" treatment effects
#'
#' data(base_did)
#' base_inter = base_did
#'
#' # We interact the variable 'period' with the variable 'treat'
#' est_did = feols(y ~ x1 + i(period, treat, 5) | id + period, base_inter)
#'
#' # In the estimation, the variable treat is interacted
#' # with each value of period but 5, set as a reference
#'
#' # ggcoefplot will show all the coefficients:
#' ggcoefplot(est_did)
#'
#'
#' # Note that the grouping of the coefficients is due to 'group = "auto"'
#'
#' # If you want to keep only the coefficients
#' # created with i() (ie the interactions), use ggiplot
#' ggiplot(est_did)
#'
#' # We can see that the graph is different from before:
#' # - only interactions are shown,
#' # - the reference is present,
#' # => this is fully flexible
#'
#' ggiplot(est_did, ci_level = c(0.8, 0.95))
#' ggiplot(est_did, ref.line = FALSE, pt.join = TRUE, geom_style = "errorbar")
#' ggiplot(est_did, geom_style = "ribbon", col = "orange")
#' # etc
#'
#' # We can also use a dictionary to replace label values. The dicionary should
#' # take the form of a named vector or list, e.g. c("old_lab1" = "new_lab1", ...)
#'
#' # Let's create a "month" variable
#' all_months = c("aug", "sept", "oct", "nov", "dec", "jan",
#' "feb", "mar", "apr", "may", "jun", "jul")
#' # Turn into a dictionary by providing the old names
#' # Note the implication that treatment occured here in December (5 month in our series)
#' dict = all_months; names(dict) = 1:12
#' # Pass our new dictionary to our ggiplot call
#' ggiplot(est_did, pt.join = TRUE, geom_style = "errorbar", dict = dict)
#'
#' #
#' # What if the interacted variable is not numeric?
#'
#' # let's re-use our all_months vector from the previous example, but add it
#' # directly to the dataset
#' base_inter$period_month = all_months[base_inter$period]
#'
#' # The new estimation
#' est = feols(y ~ x1 + i(period_month, treat, "oct") | id+period, base_inter)
#' # Since 'period_month' of type character, iplot/coefplot both sort it
#' ggiplot(est)
#'
#' # To respect a plotting order, use a factor
#' base_inter$month_factor = factor(base_inter$period_month, levels = all_months)
#' est = feols(y ~ x1 + i(month_factor, treat, "oct") | id + period, base_inter)
#' ggiplot(est)
#'
#' # dict -> c("old_name" = "new_name")
#' dict = all_months; names(dict) = 1:12; dict
#' ggiplot(est_did, dict = dict)
#'
#' #
#' # Example 3: Setting defaults
#' #
#'
#' # The customization logic of ggcoefplot/ggiplot works differently than the
#' # original base fixest counterparts, so we don't have "gg" equivalents of
#' # setFixest_coefplot and setFixest_iplot. However, you can still invoke some
#' # global fixest settings like setFixest_dict(). SImple example:
#'
#' base_inter$letter = letters[base_inter$period]
#' est_letters = feols(y ~ x1 + i(letter, treat, 'e') | id+letter, base_inter)
#'
#' # Set global dictionary for capitalising the letters
#' dict = LETTERS[1:10]; names(dict) = letters[1:10]
#' setFixest_dict(dict)
#'
#' ggiplot(est_letters)
#'
#' setFixest_dict() # reset
#'
#' #
#' # Example 4: group + cleaning
#' #
#'
#' # You can use the argument group to group variables
#' # You can further use the special character "^^" to clean
#' # the beginning of the coef. name: particularly useful for factors
#'
#' est = feols(Petal.Length ~ Petal.Width + Sepal.Length +
#' Sepal.Width + Species, iris)
#'
#' # No grouping:
#' ggcoefplot(est)
#'
#' # now we group by Sepal and Species
#' ggcoefplot(est, group = list(Sepal = "Sepal", Species = "Species"))
#'
#' # now we group + clean the beginning of the names using the special character ^^
#' ggcoefplot(est, group = list(Sepal = "^^Sepal.", Species = "^^Species"))
#'
#'
#' #
#' # Example 5: Some more ggcoefplot/ggiplot extras
#' #
#'
#' # We'll demonstrate using the staggered treatment example from the
#' # introductory fixest vignette.
#'
#' data(base_stagg)
#' est_twfe = feols(
#' y ~ x1 + i(time_to_treatment, treated, ref = c(-1, -1000)) | id + year,
#' base_stagg
#' )
#' est_sa20 = feols(
#' y ~ x1 + sunab(year_treated, year) | id + year,
#' data = base_stagg
#' )
#'
#' # Plot both regressions in a faceted plot
#' ggiplot(
#' list('TWFE' = est_twfe, 'Sun & Abraham (2020)' = est_sa20),
#' main = 'Staggered treatment', ref.line = -1, pt.join = TRUE
#' )
#'
#' # So far that's no different than base iplot (automatic legend aside). But an
#' # area where ggiplot shines is in complex multiple estimation cases, such as
#' # lists of fixest_multi objects. To illustrate, let's add a split variable
#' # (group) to our staggered dataset.
#' base_stagg_grp = base_stagg
#' base_stagg_grp$grp = ifelse(base_stagg_grp$id %% 2 == 0, 'Evens', 'Odds')
#'
#' # Now re-run our two regressions from earlier, but splitting the sample to
#' # generate fixest_multi objects.
#' est_twfe_grp = feols(
#' y ~ x1 + i(time_to_treatment, treated, ref = c(-1, -1000)) | id + year,
#' data = base_stagg_grp, split = ~ grp
#' )
#' est_sa20_grp = feols(
#' y ~ x1 + sunab(year_treated, year) | id + year,
#' data = base_stagg_grp, split = ~ grp
#' )
#'
#' # ggiplot combines the list of multi-estimation objects without a problem...
#' ggiplot(list('TWFE' = est_twfe_grp, 'Sun & Abraham (2020)' = est_sa20_grp),
#' ref.line = -1, main = 'Staggered treatment: Split multi-sample')
#'
#' # ... but is even better when we use facets instead of dodged errorbars.
#' # Let's use this an opportunity to construct a fancy plot that invokes some
#' # additional arguments and ggplot theming.
#' ggiplot(
#' list('TWFE' = est_twfe_grp, 'Sun & Abraham (2020)' = est_sa20_grp),
#' ref.line = -1,
#' main = 'Staggered treatment: Split multi-sample',
#' xlab = 'Time to treatment',
#' multi_style = 'facet',
#' geom_style = 'ribbon',
#' facet_args = list(labeller = labeller(id = \(x) gsub(".*: ", "", x))),
#' theme = theme_minimal() +
#' theme(
#' text = element_text(family = 'HersheySans'),
#' plot.title = element_text(hjust = 0.5),
#' legend.position = 'none'
#' )
#' )
#'
#' #
#' # Aside on theming and scale adjustments
#' #
#'
#' # Setting the theme inside the `ggiplot()` call is optional and not strictly
#' # necessary, since the ggplot2 API allows programmatic updating of existing
#' # plots. E.g.
#' last_plot() +
#' labs(caption = 'Note: Super fancy plot brought to you by ggiplot')
#' last_plot() +
#' theme_grey() +
#' theme(legend.position = 'none') +
#' scale_fill_brewer(palette = 'Set1', aesthetics = c("colour", "fill"))
#' # etc.
#'
#' @export
ggcoefplot = function(
object,
geom_style = c('pointrange', 'errorbar'),
multi_style = c('dodge', 'facet'),
facet_args = NULL,
theme = NULL,
...
) {
geom_style = match.arg(geom_style)
multi_style = match.arg(multi_style)
ggiplot(
object = object,
geom_style = geom_style,
multi_style = multi_style,
facet_args = facet_args,
theme = theme,
is_iplot = FALSE,
...
)
}