Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
1359 lines (1148 sloc) 44.2 KB
---
title: "President's Day (As In: What Does President Trump Do With His Day?)"
twitterTitle: "President's Day"
author: Garrick Aden-Buie
date: '2019-02-27'
slug: presidents-day
aliases: /blog/2019/02/27/presidents-day
categories:
- Blog
tags:
- R
- rtweet
- ggplot2
- Visualization
- Politics
Keywords: ["rstats", "rtweet", "ggplot2", "tidyverse", "data cleaning", "data visualization", "politics", "trump", "tweets"]
Description: |
As in: What does Presdient Trump do with his day? (Spoiler: A whole lot of "executive time".)
twitterImage: "/blog/2019/2019-02-27_presidents-day_files/figure-html/plot-time-category-total-pct-1.png"
rmd_source: https://github.com/gadenbuie/garrickadenbuie-com/blob/master/content/blog/2019/2019-02-27_presidents-day.Rmd
head_custom: |
<link href="https://fonts.googleapis.com/css?family=PT+Mono" rel="stylesheet">
output:
blogdown::html_page:
section_divs: false
---
<!-- links -->
[axios-article]: https://www.axios.com/donald-trump-private-schedules-leak-executive-time-34e67fbb-3af6-48df-aefb-52e02c334255.html
[axios-spreadsheet]: https://docs.google.com/spreadsheets/d/1oITCuVsYdhNXtY7GElLelsrbjRRIPJ1ce-_v-8J1X_A/edit#gid=0
[axios-comparison]: https://www.axios.com/donald-trump-schedule-obama-bush-clinton-513c73ff-4d15-4e7a-9676-334557a894ee.html
[vox-exec-time]: https://www.vox.com/policy-and-politics/2019/2/4/18210345/trump-executive-time-axios-private-schedule-leak
[politico-exec-time]: https://www.politico.com/story/2018/10/29/trump-daily-schedule-executive-time-944996
[maelle-rectangle]: https://masalmon.eu/2019/02/11/trump-schedule/
[yonicd-app]: https://potus-private-schedule.herokuapp.com/
[plot.ly]: https://plot.ly
[tidyverse]: https://tidyverse.org
[so-rev-date]: https://stackoverflow.com/a/43626186
[tidyeval]: https://tidyeval.tidyverse.org/
[hrbrthemes]: https://hrbrmstr.github.io/hrbrthemes/
[rtweet]: https://rtweet.info
[so-plotly]: https://stackoverflow.com/a/45802923/2022615
```{r setup, include=FALSE}
knitr::opts_chunk$set(
echo = FALSE, warning = FALSE, message = FALSE,
fig.width = 9, fig.height = 10, fig.show = "hide"
)
options(htmltools.dir.version = TRUE)
```
```{r library, results="hide"}
library(tidyverse)
library(lubridate)
library(hrbrthemes)
library(showtext)
library(sysfonts)
library(glue)
```
```{r function-in_hours}
# Convert datetime to decimal hour of day
in_hours <- function(x) {
hour(x) + minute(x)/60 + second(x)/60^2
}
```
```{r function-executive_categories}
executive_categories <- c(
"executive_time" = "Executive Time",
"event" = "Event",
"lunch" = "Lunch",
"meeting" = "Meeting",
"no_data" = "Unknown",
"travel" = "Travel"
)
```
```{r function-filter_workday}
filter_workday <- function(
df,
start = hms::hms(0, 0, 8),
end = hms::hms(0, 0, 17),
start_var = time_start,
end_var = time_end
) {
start_var <- rlang::enquo(start_var)
end_var <- rlang::enquo(end_var)
df %>%
filter(
date(!!start_var) == date(!!end_var),
hms::as.hms(!!start_var) >= start,
hms::as.hms(!!end_var) <= end
)
}
```
```{r load-exec-time}
# Exec Time Downloaded from http://bit.ly/2Sk9Vj7
exec_time <-
read_csv(
here::here(
"_data", "trump-exec-time",
"axios_trump_schedule_2018-11-07--2019-02-02.csv"
),
col_types = cols(.default = col_character())
) %>%
mutate(
# Convert time start/end to datetime
event_id = row_number(),
time_start = paste(date, time_start),
time_end = paste(date, time_end),
time_start = ymd_hm(time_start, tz = "America/New_York"),
time_end = ymd_hm(time_end, tz = "America/New_York"),
time_end = if_else(time_start > time_end,
time_end + hours(24), time_end),
# Recode the activity category with nicer labels
top_category = factor(top_category,
levels = names(executive_categories),
labels = executive_categories)
) %>%
mutate(
# Create label pieces for plotly hover text
label_title = glue("<b>{top_category}</b>"),
has_uniq_title = tolower(top_category) != tolower(listed_title),
has_subtitle = has_uniq_title & !is.na(listed_title),
label_subtitle = if_else(has_subtitle,
glue("<br><em>{listed_title}</em>"), ""),
has_location = !is.na(listed_location),
label_location = if_else(has_location,
glue("<br><em>{listed_location}</em>"), ""),
label_time = glue(
"<br><br>{strftime(time_start, '%A, %B %e %H:%M')} ",
"to {strftime(time_end, '%H:%M')}"),
has_notes = !is.na(notes),
label_notes = if_else(has_notes, paste0("<br><br>", notes), ""),
# Compose final tooltip text
label = paste0(label_title, label_subtitle, label_location,
label_time, label_notes)
) %>%
mutate(
# truncate any activities that span 8am or 5pm
time_start = if_else(
in_hours(time_start) < 8 & in_hours(time_end) > 8,
floor_date(time_start, "day") + hours(8),
time_start
),
time_end = if_else(
in_hours(time_start) < 17 & in_hours(time_end) > 17,
floor_date(time_end, "day") + hours(17),
time_end
),
# create 5 minute increments "inside" each activity
time_inc = map2(time_start, time_end, seq, by = "5 mins")
) %>%
select(event_id, time_start, time_end, time_inc,
listed_title, top_category, label)
```
```{r get-djt-tweets, eval = FALSE}
if (FALSE) {
djt <- NULL
keep_going <- TRUE
while (keep_going) {
max_id <- if (!is.null(djt$status_id)) max(as.numeric(djt$status_id))
djt.this <- rtweet::get_timeline("realdonaldtrump", n = 3200, max_id = max_id)
djt <- bind_rows(djt, djt.this)
keep_going <- min(djt$created_at) > lubridate::ymd_h("2018-11-07 0", tz = "America/New_York")
cat("\nWe have", nrow(djt), "tweets...")
Sys.sleep(15)
}
saveRDS(djt, here::here("_data", "djt-tweets-2018-09--2019-02.rds"))
}
```
```{r load-djt-tweets}
djt <- readRDS(here::here("_data", "djt-tweets-2018-09--2019-02.rds")) %>%
distinct(status_id, .keep_all = TRUE)
```
```{r djt-tweets}
djt_simple <-
djt %>%
filter(!is_retweet) %>%
mutate(
created_at = with_tz(created_at, tzone = "America/New_York"),
time_inc = floor_date(created_at, "5 min")
) %>%
select(created_at, time_inc, text, status_id)
djt_joined_all <-
exec_time %>%
unnest() %>%
filter(time_end != time_inc) %>%
full_join(djt_simple, by = "time_inc") %>%
select(
event_id, starts_with("time_"), created_at,
listed_title, top_category, text
) %>%
filter(
!is.na(text)
) %>%
distinct(text, .keep_all = TRUE) %>%
mutate(
hour = in_hours(created_at),
date = floor_date(created_at, "day"),
top_category = factor(top_category, executive_categories),
label_sched = if_else(
wday(created_at, abbr = FALSE, week_start = 1) >= 6,
glue("<br>During: <b>Weekend</b>"),
glue(
"<br>During: <b>{top_category}</b>",
"<br>Sched: {strftime(time_start, '%I:%M')} ",
"to {strftime(time_end, '%I:%M %p')}")
),
label = glue("{str_wrap(text, 50)}",
"<br><i>@realDonaldTrump ",
"{strftime(created_at, '%a, %b %e, %I:%M %p')}</i>",
"{label_sched}")
) %>%
select(-time_start, -time_end, -label_sched) %>%
arrange(created_at)
djt_joined <-
djt_joined_all %>%
filter(!is.na(top_category))
```
```{r functions-am-pm}
am_pm <- function(x) {
x <- floor(x)
y <- paste(x)
y[x < 1] <- "12 am"
y[x > 1 & x < 12] <- paste(y[x > 1 & x < 12], "am")
y[x == 12] <- "12 pm"
y[x > 12] <- paste(x[x > 12] - 12, "pm")
y
}
```
```{r functions-rev-date}
# Reverse Time Axis
# thanks: https://stackoverflow.com/a/43626186
c_trans <- function(a, b, breaks = b$breaks, format = b$format) {
library(scales)
a <- as.trans(a)
b <- as.trans(b)
name <- paste(a$name, b$name, sep = "-")
trans <- function(x) a$trans(b$trans(x))
inv <- function(x) b$inverse(a$inverse(x))
trans_new(name, trans, inverse = inv, breaks = breaks, format=format)
}
rev_date <- c_trans("reverse", "time")
```
```{r functions-lightbox-img}
lightbox_img <- function(url, alt = "", caption = "", preview = TRUE) {
if (preview) {
glue::glue(
'<a href="{url}" data-featherlight="image">
<div class="figure">
<img src="{url}" alt="{alt}">
<p class="caption">{caption}</p>
</div>
</a>
'
)
} else {
if (alt == "") alt <- "static image of the plot"
glue::glue('<a href="{url}" data-featherlight="image">{alt}</a>')
}
}
```
```{r plot-theme-fonts}
sysfonts::font_add_google("PT Sans")
sysfonts::font_add_google("PT Sans Narrow")
sysfonts::font_add_google("PT Mono")
showtext::showtext_auto()
```
```{r plot-theme}
theme_set(
hrbrthemes::theme_ipsum(
base_family = "PT Sans",
base_size = 20,
axis_title_family = "PT Sans Narrow",
axis_title_size = 14,
axis_text_size = 12,
axis_title_just = "c"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
plot.margin = margin(15, 15, 15, 15),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
legend.position = "bottom",
legend.key = element_rect(
fill = "white", color = "white", size = 4),
legend.key.width = unit(2, "cm"),
legend.text = element_text(size = 9),
plot.caption = element_text(
color = "#85919b", hjust = 0, size = 12, face = "plain")
)
)
```
```{r plot-vars}
plot_date_breaks <- seq(
from = ymd_h("2018-11-12 0", tz = "America/New_York"),
to = max(exec_time$time_start),
by = "7 day")
event_type_colors <- c(
"Executive Time" = "#445566",
"Travel" = "#997788",
"Lunch" = "#7c9393",
"Meeting" = "#b7c6d6",
"Event" = "#ddbbaa",
"Unknown" = "#d9dde0")
event_type_labels <- sub(" ", "\n", names(event_type_colors))
```
```{r plot-credit-caption}
credit_caption <- function(rtweet = FALSE) {
paste0(
"\nData: Based on White House schedules released by Axios. http://bit.ly/2UGM0fw",
if (rtweet) "\n Tweets collected with {rtweet}. https://rtweet.info",
"\nChart: @grrrck"
)
}
```
On February 3rd, Axios [released President Trump's daily schedule][axios-article].
As in many other areas of his political career, Trump has broken with tradition by hiding his schedule from public view.
In addition to a set of re-typed PDF files, Axios also created a [Google Spreadsheet][axios-spreadsheet] containing the president's schedule and notes about the activities.
If you're interesting in reading about how that task could be accomplished, I highly recommend Maëlle Salmon's post on [rectangling the tables in the PDF files][maelle-rectangle].
The leak and subsequent release by Axios provide unique insight into Trump's daily activities, which are dominated by a large block of time referred to as **Executive Time**.
Reportedly, Trump hated following a strict daily schedule, so former chief-of-staff John Kelly introduced the concept of Executive Time: unstructured time when the president ~~reads~~ watches news, makes phone calls, and writes ~~emails~~ tweets.
I won't comment extensively on what these schedules mean---for more on that angle, see reporting from [Axios][axios-comparison], [Vox][vox-exec-time], [Politico][politico-exec-time] and others.
Instead, I'll use this post to visualize the president's work day and tweeting habits and a demonstrate how to use R, [plot.ly] and the tools of the [tidyverse] to create interactive and static visualizations to try to make sense of what the president does on a daily basis.
## The President's Daily 8am to 5pm Schedule
Axios' [article on Trump's private schedule][axios-article] includes an interactive view of the president's workday schedule from 8 a.m. to 5 p.m.
Here, I recreate the same visualization, with each activity colored according to the activity's category.
(Note these plots look best on desktop devices.)
Hover over any time slot to view more details about the activity at that time.
You can also toggle activity categories --- try removing everything except **Meeting**s, **Lunch**es, and **Event**s, it's unbelievable.
```{r plot-daily-schedule, fig.show = "show", out.width="115%"}
g <-
exec_time %>%
mutate(date = floor_date(time_start, "day")) %>%
filter_workday() %>%
mutate_at(vars(time_start, time_end), in_hours) %>%
ggplot() +
geom_rect(
aes(xmin = time_start,
xmax = time_end,
ymin = date - 3600 * 12,
ymax = date + 3600 * 12,
fill = top_category)
) +
scale_x_continuous(
breaks = seq(8, 17, 3),
limits = c(8, 17),
position = "top",
labels = am_pm(seq(8, 17, 3)),
expand = expand_scale(c(0.025, 0), 0)
) +
scale_y_continuous(
trans = rev_date,
breaks = plot_date_breaks,
labels = strftime(plot_date_breaks, "%b %e"),
expand = expand_scale(c(0.025, 0), 0)
) +
scale_fill_manual(
values = event_type_colors,
labels = event_type_labels
) +
labs(x = "Hour of the Day", y = NULL, fill = NULL) +
ggtitle(
"President Trump's Daily Schedule",
"November 7, 2018 through February 2, 2019"
) +
labs(caption = credit_caption(FALSE)) +
guides(fill = guide_legend(nrow = 1, label.position = "bottom")) +
theme(
axis.text.y = element_text(family = "PT Mono", size = 10),
plot.margin = margin(3, 0, 0, 0, unit = "line")
)
plotly::ggplotly(g + aes(text = label), tooltip = "label") %>%
plotly::layout(xaxis = list(side = "top", title = ""))
```
```{r plot-daily-schedule-static, dependson="plot-daily-schedule"}
g +
theme(plot.margin = margin(0, 15, 0, 15))
```
View `r lightbox_img(knitr::fig_chunk("plot-daily-schedule-static", "png"), preview = FALSE)`.
Expand the section below for a behind-the-scenes look at this visualization.
### How This Was Made... {.js-expandmore}
<div class="js-to_expand">
The pipeline for building this visualization is a fairly standard loading and transformation of the source data with `readr` and `dplyr`, followed by building the visualization in `ggplot2` and passing off to `plotly` for the interactive parts.
On the other hand, I created a number of helper functions and constants that I reused throughout this post, so there's quite a bit of code and preamble to get to the actual plot making.
```r
library(tidyverse)
library(plotly)
library(lubridate)
library(glue)
library(hrbrthemes)
library(showtext)
library(sysfonts)
```
#### Load the Axios Data
```{r view-load-axios, eval=FALSE, echo=TRUE}
<<function-in_hours>>
<<function-executive_categories>>
<<load-exec-time>>
glimpse(exec_time)
```
```{r view-exec-time}
glimpse(exec_time)
```
#### Prepare to Make a Plot
To build the plot, I first created several helper functions for the plot labels, scales, and data filtering.
I also set the global plot theme, and created a few constants that I used across several plots in this post.
##### Helper Functions {.js-expandmore}
<div class="js-to_expand">
The first helper adds `"am"` or `"pm"` to an integer hour for easy-to-read labels on the x-axis _time of day_.
```{r view-am-pm, echo = TRUE}
<<functions-am-pm>>
am_pm(seq(8, 17, 3))
```
The second helper function is copied directly from [StackOverflow: Reverse datetime (POSIXct data) axis in ggplot][so-rev-date], in one of the rare-but-beautiful moments when directly copying and pasting from SO works out perfectly.
This gives me a `rev_date()` transformer function that can be passed to `scale_y_continuous(trans = rev_date)` to show the y-axis in chronological order with the earliest date starting at the top.
A third helper function abstracts the code required to filter out the portion of the data set that belongs to the workday, which I use throughout this post.
This function is a nice example of how [tidyeval] can be used for flexible dplyr wrapper functions.
```{r view-filter-workday, echo=TRUE, eval=FALSE}
<<function-filter_workday>>
```
Finally, another helper function creates the plot caption credits.
```{r view-caption-credit, echo=TRUE, eval=FALSE}
<<plot-credit-caption>>
```
</div>
##### Plot Themes {.js-expandmore}
<div class="js-to_expand">
I used `showtext` and `sysfonts` to match the fonts in the plot to the font used on this blog (PT Mono).
```{r view-plot-fonts, eval=FALSE, echo=TRUE}
<<plot-theme-fonts>>
```
And I used [hrbrthemes'][hrbrthemes] excellent `theme_ipsum()` as my theme's starting point.
```{r view-plot-theme, eval=FALSE, echo=TRUE}
<<plot-theme>>
```
</div>
##### Plot Constants {.js-expandmore}
<div class="js-to_expand">
I created a few constants for later reference: one stores date breaks for the time period covered by the Axios schedule, with labels on each Monday; and the other two store the color palette and labels for the executive activity categories.
The colors were hand-selected from a picture of the Donald (I was expecting there to be more orange).
```{r view-plot-constants, echo=TRUE, eval=FALSE}
<<plot-vars>>
```
```{r view-plot-colors, fig.width=4, fig.height=3}
scales::show_col(event_type_colors)
```
<div style="width: 100%">
<img src="`r knitr::fig_chunk("view-plot-colors", "png")`" style="max-width: 300px; margin: auto;" />
</div>
</div>
#### Build the Actual Plot Already!
Finally, I pulled the schedule data and all of the above pieces together to build the interactive plot.
```{r view-plot-daily-schedule, eval=FALSE, echo=TRUE}
<<plot-daily-schedule>>
```
And that's it!
[Jump back up](#the-presidents-daily-8am-to-5pm-schedule) to see the final product.
</div>
## The President's Daily Tweeting Schedule
```{r djt-workday}
djt_axios_sched <-
djt_joined_all %>%
filter(date >= "2018-11-07", date <= "2019-02-02") %>%
mutate(
weekend = wday(created_at, abbr = FALSE, week_start = 1) >= 6,
weekend = if_else(weekend, "weekend", "weekday")
) %>%
select(-text)
djt_workday <-
djt_axios_sched %>%
filter_workday(
start_var = created_at, end_var = created_at
)
```
```{r djt-workday-for-text}
djt_per_workday <-
djt_axios_sched %>%
filter_workday(
start_var = created_at, end_var = created_at
) %>%
group_by(weekend) %>%
count(date) %>%
summarize(
tweets = sum(n),
n = n(),
mean = tweets/n) %>%
split(.$weekend)
djt_per_day <-
djt_axios_sched %>%
group_by(weekend) %>%
count(date) %>%
summarize(
tweets = sum(n),
n = n(),
mean = tweets/n) %>%
split(.$weekend)
```
Donald Trump tweets about
`r round(djt_per_workday$weekday$mean, 1)`
tweets per working day within working hours.
Why does this number feel so low?
Then again, this represents
`r djt_per_workday$weekday$tweets`
tweets published over
`r djt_per_workday$weekday$n`
workdays and
`r djt_per_workday$weekend$tweets`
tweets over the
`r djt_per_workday$weekend$n`
weekend days in the same time period.
Outside of work hours, the average rises to
`r round(djt_per_day$weekday$mean, 1)`
tweets per 24-hour workday,
or a total of
`r djt_per_day$weekday$tweets`
tweets on workdays and
`r djt_per_day$weekend$tweets`
tweets on weekends.
Below, each tweet sent by the President is shown as a dot over his private schedule.
Hover over a tweet's dot to read the text of the tweet.
```{r plot-tweets-over-category, fig.show = "show", out.width="115%"}
# Modify geom_rect (only layer) to reduce transparency
g_subdued <- g
g_subdued$layers[[1]]$aes_params$alpha <- 0.6
# Add tweets to the plot
g_subdued <-
g_subdued +
ggtitle("President Trump's Daily Tweeting") +
geom_point(
data = djt_workday,
aes(x = hour, y = date + 3600, text = label),
color = "#2c3741",
size = 0.8
)
gpltly <-
plotly::ggplotly(g_subdued, tooltip = "label") %>%
plotly::layout(xaxis = list(side = "top", title = ""))
# remove hover labels for time category layers (6 categories)
# thanks: https://stackoverflow.com/a/45802923/2022615
for (i in 1:6) {
gpltly$x$data[[i]]$hoverinfo <- "none"
}
# Final Plot!
gpltly
```
```{r plot-tweets-over-category-static}
g_subdued +
theme(plot.margin = margin(0, 15, 0, 15))
```
View `r lightbox_img(knitr::fig_chunk("plot-tweets-over-category-static", "png"), preview = FALSE)`.
Expand the section below to learn more about how I gathered tweets from &commat;realDonaldTrump, merged his tweets with the Axios schedules, and added them to the first plot.
Also, if you're interested in exploring the timeline of tweets rendered as they appear on Twitter, Jonathan Sidi created an awesome Shiny app for [exploring Trump's tweets by category][yonicd-app].
### How This Was Made... {.js-expandmore}
<div class="js-to_expand">
This visualization required the President's tweets, which I downloaded using the excellent [rtweet] package.
After matching the tweets with their corresponding activity, I modified the previous plot to soften the coloring of the presidential activities and added the tweets to the chart.
#### Download the President's Tweets
Using the [rtweet] package makes downloading the tweets relatively straightforward.
I only needed to add a loop to gather tweets beyond Twitter's timeline API limits and ensure that I have all the tweets from the time period described in the leaked schedules.
```{r view-get-djt-tweets, echo=TRUE, eval=FALSE}
# Change to `if (TRUE)` to run
<<get-djt-tweets>>
```
#### Join Tweets with the President's Schedule
To align the tweets with the President's schedule, I rounded (actually, floored) the time stamp of each tweet down to the nearest 5 minute interval.
Then, I joined the tweets to the schedule using the 5 minute intervals I created while importing the schedule (see above).
This gives me the category and scheduled activity of each tweet.
```{r view-djt-tweets, eval=FALSE, echo=TRUE}
<<djt-tweets>>
```
Finally, I still need to do a little bit of processing to get these tweets to fit into the previous plot.
```r
djt_workday <-
djt_joined_all %>%
filter_workday(
start_var = created_at, end_var = created_at
) %>%
filter(date >= "2018-11-07", date <= "2019-02-02") %>%
select(-text)
```
At this point, there are a few versions of the tweets data set that I can use in different places.
The `djt_simple` is a basic, bare-bones tibble of tweets.
```{r view-djt-simple}
djt_simple %>%
arrange(created_at) %>%
filter(created_at >= "2018-11-07")
```
The `djt_joined_all` variable holds the complete set of all tweets joined with the full schedule, meaning that there will be missing values where no tweets occurred in a 5 minute window or where the schedule data doesn't cover a tweet.
```{r view-djt-joined-all}
set.seed(4242)
top_10 <- sample(1:nrow(djt_joined_all), 10)
header_rows <- c(
sort(top_10),
setdiff(1:nrow(djt_joined_all), top_10)
)
djt_joined_all %>%
slice(header_rows) %>%
select(time_inc, top_category, text, everything())
```
And `djt_workday` contains the tweets within the period covered by the Axios schedules and during workday hours.
```{r view-djt-workday}
djt_workday %>%
select(time_inc, top_category, label, everything())
```
#### Add Tweets to the Schedule Plot
To build the second plot, I had to manually tweak the first plot to adjust the transparency of the `geom_rect` layer and then overlay the tweets as points.
```r
`r paste(knitr:::knit_code$get("plot-tweets-over-category")[1:14], collapse = "\n")`
```
At this point, the plot is almost ready to go, except for the fact that the tooltip text will appear for the underlying activity layers.
Fortunately, once again [StackOverflow][so-plotly] comes to the rescue.
To disable the tooltip, I have to save the plotly object and change the `$hoverinfo` value to `"none"` for each of the data layers of the activity categories.
```r
`r paste(knitr:::knit_code$get("plot-tweets-over-category")[16:25], collapse = "\n")`
```
Head [back to the visualization](#the-presidents-daily-tweeting-schedule) to view the final product and explore Trump's ~~delusional ranting~~ tweets.
</div>
## How much time is spent in Executive Time?
Looking at the above plots, it's really striking how much time is unstructured _Executive Time_ in Trump's schedule.
But how much of the day is spent in each activity group?
##### R code... {#code-time-summary .js-expandmore .expand-for-code}
<div class="js-to_expand">
The first step is to calculate the total time as a percentage of the 8am to 5pm workday spent in each time category.
This data frame will be used for several plots.
One tricky point is that there are piece of the schedule that are explicitly marked as "Unknown" (or "no data") in the Axios data, so I calculate the total percent of time spent in other categories and subtract this value from 1 to recover the complete unaccounted-for time.
```{r exec-time-total-pct, echo = TRUE}
exec_time_total <-
exec_time %>%
filter(between(hour(time_start), 8, 17), hour(time_start) < 17) %>%
mutate(
date = floor_date(time_start, "day"),
n = difftime(time_end, time_start, units = "mins"),
n = as.numeric(n)
) %>%
group_by(date, top_category) %>%
summarize(pct = sum(n) / (60 * 9)) %>%
ungroup() %>%
# Get unaccounted time for each date (unknown or unlabelled)
nest(-date) %>%
mutate(
total = map_dbl(data, ~ {
filter(., top_category != "Unknown") %>%
pull(pct) %>%
sum()
}),
unaccounted = 1 - total,
) %>%
unnest() %>%
spread(top_category, pct, fill = 0) %>%
mutate(`Unknown` = unaccounted) %>%
select(-total, -unaccounted) %>%
gather("top_category", "pct", -date) %>%
mutate(top_category = factor(top_category, rev(names(event_type_colors))))
exec_time_total %>%
arrange(date, desc(pct))
```
The above tibble contains a summary of time use by day, but the first plot requires a full summary of all days in the schedule.
The following code chunk caculates total hours spent in each group and creates a text label that is used to label the regions of the stacked bar in the plot.
```{r exec-time-total-hours, echo=TRUE}
exec_time_hours <-
exec_time_total %>%
# Only the days covered by the schedule
filter(!(pct == 1 & top_category == "Unknown")) %>%
group_by(top_category) %>%
summarize(hours = sum(pct * 60 * 90)) %>%
arrange(desc(top_category)) %>%
mutate(
pct = hours / sum(hours),
pct_upto = cumsum(pct),
label = glue("{top_category}\n",
"{scales::percent(pct, accuracy = 1)}"),
label = if_else(top_category == "Unknown", "", paste(label))
)
exec_time_hours
```
Finally, I create the plot using `geom_col()` to create a stacked bar chart, that I then rotate to be horizontal with `coord_flip()`.
The bar labels are added as a text annotation, and I do a little adjustment to make sure the annotations fit in the plot and to hide the axis that aren't relevant.
```{r plot-time-category-total-pct, fig.height=4, echo = TRUE}
ggplot(exec_time_hours) +
aes(x = 1,
y = pct,
fill = top_category) +
geom_col() +
geom_text(
aes(x = 0.35, y = pct_upto - pct/2, label = label),
color = "grey30",
family = "PT Sans"
) +
scale_fill_manual(
values = event_type_colors,
labels = rev(event_type_labels),
guide = FALSE
) +
scale_x_continuous(
expand = expand_scale(0, c(0.2, 0))
) +
scale_y_continuous(
labels = scales::percent_format(accuracy = 10),
expand = expand_scale(0, 0),
position = "bottom"
) +
coord_flip() +
labs(
x = NULL,
y = "Percent of Workday Between 8am and 5pm",
fill = NULL
) +
ggtitle(
"What Does President Trump Do With His Time?"
) +
labs(caption = credit_caption(FALSE)) +
theme(
axis.text.y = element_blank(),
panel.grid.major = element_blank(),
axis.ticks.x.top = element_line(color = "grey20")
)
```
</div>
`r lightbox_img(knitr::fig_chunk("plot-time-category-total-pct", "png"))`
```{r exec-time-summary}
untracked_days <-
exec_time_total %>%
filter(
top_category == "Unknown",
pct ==1
) %>%
pull(date)
exec_time_summary <-
exec_time_total %>%
complete(
date = seq(min(.$date), max(.$date), by = "DSTday"),
top_category
) %>%
filter(
top_category == "Executive Time",
!date %in% untracked_days
) %>%
replace_na(list(pct = 0)) %>%
mutate(weekday = wday(date, abbr = FALSE, week_start = 1) < 6)
exec_time_n_weekdays <-
exec_time_summary %>%
filter(weekday) %>%
nrow()
exec_time_gt_half <-
exec_time_summary %>%
filter(weekday, pct >= 0.5) %>%
nrow()
```
For
**`r exec_time_gt_half`**
of the
**`r exec_time_n_weekdays`**
workdays
(that's **`r scales::percent(exec_time_gt_half/exec_time_n_weekdays)`**)
covered by the Axios schedules and for which there is schedule information,
Trump spent 50% _or more_ of his day in executive time.
In other words, there were only
**`r exec_time_n_weekdays - exec_time_gt_half`**
days in about
**`r round(exec_time_n_weekdays/5, 0)`**
work weeks where executive time was not the dominant activity.
When the above time-use summary is expanded into his daily schedule, it's clear how unusual it is for Trump to spend a significant portion of his day in structured events.
##### R code... {#code-time-category-pct-daily .js-expandmore .expand-for-code}
<div class="js-to_expand">
```{r plot-time-category-pct-daily, echo=TRUE}
ggplot(exec_time_total) +
aes(date + 3600*12, pct, fill = top_category) +
geom_col(width = 3600*24) +
scale_fill_manual(
values = event_type_colors,
labels = rev(event_type_labels)
) +
scale_y_continuous(
breaks = seq(0, 1, .25),
labels = scales::percent_format(accuracy = 25),
position = "bottom",
expand = expand_scale(c(0.025, 0), 0)
) +
scale_x_continuous(
trans = rev_date,
breaks = plot_date_breaks,
labels = strftime(plot_date_breaks, "%b %e"),
expand = expand_scale(c(0.025, 0), 0)
) +
coord_flip() +
labs(
x = NULL,
y = "Percent of Workday Between 8am and 5pm",
fill = NULL
) +
guides(
fill = guide_legend(nrow = 1,
reverse = TRUE,
label.position = "bottom")
) +
ggtitle(
"What Does President Trump Do With His Time?"
) +
labs(caption = credit_caption(FALSE))
```
</div>
`r lightbox_img(knitr::fig_chunk("plot-time-category-pct-daily", "png"))`
In fact, the largest non-executive time block for the
`r exec_time_n_weekdays - exec_time_gt_half`
days where executive time isn't more than half of Trump's workday are almost entirely travel related.
##### R code... {#code-table-not-full-on-exec-time-day .js-expandmore .expand-for-code}
<div class="js-to_expand">
```{r table-not-full-on-exec-time-day, results="asis", echo=TRUE, eval=FALSE}
exec_time_summary %>%
filter(pct < 0.5, weekday) %>%
select(date) %>%
left_join(exec_time %>% mutate(date = floor_date(time_start, "day"))) %>%
mutate(duration = difftime(time_end, time_start, unit = "hours")) %>%
select(date, duration, listed_title) %>%
arrange(date, desc(duration)) %>%
filter(listed_title != "Executive time") %>%
group_by(date) %>%
slice(1) %>%
mutate(duration = round(duration, 2)) %>%
knitr::kable(col.names = c(
"Date", "Duration", "Longest Non-Executive-Time Activity"
), format = "html") %>%
kableExtra::column_spec(1:2, width = "6.5em")
```
</div>
```{r table-not-full-on-exec-time-day-out, results="asis"}
<<table-not-full-on-exec-time-day>>
```
Travel seems to be the only activity capable of substantially affecting the amount of time the president spends on his executive time.
My (completely speculative) guess is that this is in part due to travel being the only activity with a duration long enough to displace executive time, and also in part that travel probably most resembles executive time.
##### R code... {#code-plot-exec-time-vs-others .js-expandmore .expand-for-code}
<div class="js-to_expand">
```{r plot-exec-time-vs-others, fig.width=8, fig.height=3, echo=TRUE}
exec_time_total %>%
# Drop "Unkown" time category, not that important
filter(top_category != "Unknown") %>%
# Spread top_category...
spread(top_category, pct) %>%
# ...and gather to leave Executive Time in own column
gather(other_activity, pct, -date, -`Executive Time`) %>%
# Ignore points where both groups are 0% (not informative)
filter(pct + `Executive Time` > 0) %>%
# Pipe into ggplot
ggplot() +
aes(`Executive Time`, pct, color = other_activity) +
geom_point() +
facet_wrap(~ other_activity, nrow = 1) +
scale_x_continuous(
breaks = c(0, 0.5, 1),
labels = scales::percent_format(accuracy = 25),
limits = c(0, 1)
) +
scale_y_continuous(
breaks = c(0, 0.5, 1),
labels = scales::percent_format(accuracy = 25),
limits = c(0, 1)
) +
scale_color_manual(
values = event_type_colors,
labels = rev(event_type_labels),
guide = FALSE
) +
coord_flip() +
labs(
x = "Percent of Workday\nIn Executive Time",
y = "Percent of Workday Spent in Activity",
caption = credit_caption()
) +
theme(
axis.title.x = element_text(margin = margin(10)),
axis.title.y = element_text(margin = margin(r = 20)),
)
```
</div>
`r lightbox_img(knitr::fig_chunk("plot-exec-time-vs-others", "png"))`
## Tweeter In Chief
At the point, I was very interested in exploring how Trump's tweeting relates to his work schedule.
The first question to answer is _When does he send most of his tweets?_
And the answer is: primarily on the weekends, during executive time, or before or after work hours.
##### R code... {#code-plot-tweet-counts-by-activity .js-expandmore .expand-for-code}
<div class="js-to_expand">
```{r plot-tweet-counts-by-activity, fig.height = 5, echo=TRUE}
# Start and end dates of Axios-pubslished schedules
# which I called `exec_time` for some reason and am sticking with
exec_time_boundaries <-
exec_time %>%
summarize(min = min(time_start), max = max(time_end))
exec_time %>%
# mutate(event_id = row_number()) %>%
unnest() %>%
filter(time_end != time_inc) %>%
full_join(djt_simple, by = "time_inc") %>%
select(event_id, time_inc, created_at, listed_title, top_category, text) %>%
filter(
!is.na(text),
between(time_inc, exec_time_boundaries$min, exec_time_boundaries$max)
) %>%
mutate(
wday = wday(created_at, abbr = FALSE, week_start = 1),
top_category = case_when(
!is.na(top_category) ~ paste(top_category),
wday > 5 ~ "Weekend",
between(wday, 1, 5) & hour(created_at) < 6 ~ "Early Morning (before 6am)",
between(wday, 1, 5) & hour(created_at) < 8 ~ "Morning (6-8 am)",
between(wday, 1, 5) & hour(created_at) > 17 ~ "Evening (after 5pm)",
is.na(top_category) ~ "Unknown",
TRUE ~ paste(top_category))
) %>%
count(top_category) %>%
arrange(n) %>%
mutate(top_category = fct_inorder(top_category)) %>%
ggplot() +
aes(top_category, n) +
geom_col(fill = "#445566") +
scale_y_continuous(expand = c(0, 0, 0, 5)) +
coord_flip() +
theme(panel.grid.major.y = element_blank()) +
labs(x = "Activity or Time of Day",
y = paste(
"Total Tweets Sent Between",
strftime(exec_time_boundaries$min, "%F"),
"to",
strftime(exec_time_boundaries$max, "%F")
),
title = "Trump Tweet Volume by Scheduled Activity",
caption = credit_caption(rtweet = TRUE))
```
</div>
`r lightbox_img(knitr::fig_chunk("plot-tweet-counts-by-activity", "png"))`
We can get a sense of the timing of Trump's tweeting activities by looking at the time of day of each tweet and the scheduled activity that's going on at the time.
The following plot shows each tweet as a vertical line and considers only workday tweeting and only for days covered by the Axios schedules.
##### R code... {#code-djt-tweets-timeline .js-expandmore .expand-for-code}
<div class="js-to_expand">
```{r djt-tweets-with-non-work, echo=TRUE}
event_type_colors_extra <- c(event_type_colors, "Non-Work Hours" = "#828486")
djt_joined_work_non_work <-
djt_joined_all %>%
mutate_at(vars(top_category), as.character) %>%
mutate(weekend = wday(created_at, abbr = FALSE, week_start = 1) > 5) %>%
filter(
!weekend,
between(created_at, exec_time_boundaries$min, exec_time_boundaries$max)
) %>%
mutate(top_category = if_else(
between(in_hours(created_at), 8, 17) & is.na(top_category),
"Unknown",
top_category
)) %>%
replace_na(list(top_category = "Non-Work Hours")) %>%
mutate(
top_category = fct_infreq(top_category)
)
```
```{r djt-tweets-timeline, fig.height=6, echo=TRUE}
ggplot(djt_joined_work_non_work) +
aes(x = hour, y = 1, color = top_category) +
geom_segment(aes(xend = hour, yend = 0), alpha = 0.6, size = 0.5) +
facet_wrap(~ top_category, ncol = 1, strip.position = "left") +
scale_x_continuous(
position = "bottom",
breaks = seq(0, 24, 4),
limits = c(0, 24),
labels = am_pm(seq(0, 24, 4)),
expand = expand_scale(c(0.025, 0), 0)
) +
scale_color_manual(
values = event_type_colors_extra,
labels = names(event_type_colors_extra)
) +
scale_fill_manual(
values = event_type_colors_extra,
labels = names(event_type_colors_extra)
) +
coord_cartesian(clip = "off") +
labs(x = NULL, y = NULL, color = NULL) +
guides(color = FALSE, fill = FALSE) +
theme(
panel.grid.major.y = element_blank(),
axis.text.y = element_blank(),
strip.text.y = element_text(angle = 180,
margin = margin(r = 5, l = 25),
hjust = 1),
panel.spacing.y = unit(0, "pt")
) +
ggtitle(
"What's On His Schedule When He's Tweeting?",
"Each line represents a tweet, colored by the activity on his White House Schedule"
) +
labs(caption = credit_caption(rtweet = TRUE))
```
</div>
`r lightbox_img(knitr::fig_chunk("djt-tweets-timeline", "png"))`
Most of Trump's tweeting happens betewen 7 and 9 am, but what's striking is that it's nearly impossible to tell the difference between early morning tweeting and the start of President Trump's official workday at 8am.
##### R code... {#code-plot-tweets-work-non-work-same-timeline .js-expandmore .expand-for-code}
<div class="js-to_expand">
```{r plot-tweets-work-non-work-same-timeline, fig.height=7, echo=TRUE}
djt_joined_work_non_work %>%
filter(top_category %in% c("Non-Work Hours", "Executive Time")) %>%
mutate(week = floor_date(created_at, "week"),
week = strftime(week, "%F")) %>%
ggplot() +
aes(x = hour, y = 1, color = top_category) +
geom_segment(aes(xend = hour, yend = 0)) +
facet_wrap(~ week, ncol = 1, strip.position = "left") +
scale_x_continuous(
position = "bottom",
breaks = seq(0, 24, 2),
limits = c(4, 12),
labels = am_pm(seq(0, 24, 2)),
expand = expand_scale(c(0.025, 0), 0)
) +
scale_color_manual(
values = event_type_colors_extra,
labels = names(event_type_colors_extra)
) +
scale_fill_manual(
values = event_type_colors_extra,
labels = names(event_type_colors_extra)
) +
labs(x = NULL, y = NULL, color = NULL, caption = credit_caption(TRUE)) +
guides(color = FALSE, fill = FALSE) +
theme(
panel.grid.major.y = element_blank(),
axis.text.y = element_blank(),
strip.text.y = element_text(angle = 180, margin = margin(r = 25)),
panel.spacing.y = unit(0, "pt")
) +
ggtitle(
"When Do Official Work Hours Start?",
"Morning tweets published over one week periods.\nAccording to the White House Schedule, \"Executive Time\" starts at 8am in the Oval Office."
)
```
</div>
`r lightbox_img(knitr::fig_chunk("plot-tweets-work-non-work-same-timeline", "png"))`
As we learned above, Trump sends about
`r round(djt_per_workday$weekday$mean, 0)`
tweets per working day within working hours.
Naturally, I wondered if he tends to tweet more or less during the day when he has more executive or travel time available.
Similarly does he tweet less when he has more strucured time, i.e. metings, events, or lunches?
Somewhat unsurprisingly, the number of tweets sent during the workday in only very slightly correlated with the amount of unstructured time on Trump's calendar.
This makes sense: there is very little variation in the amount of the day spent in structured events -- it's never more than half the day.
##### R code... {#code-tweets-by-exec-time .js-expandmore .expand-for-code}
<div class="js-to_expand">
```{r tweets-by-exec-time, fig.height=5, echo=TRUE}
djt_joined %>%
group_by(date) %>%
count() %>%
rename(tweets = n) %>%
left_join(exec_time_total, ., by = "date") %>%
replace_na(list(tweets = 0)) %>%
filter(top_category %in% c("Executive Time", "Unknown", "Travel", "Lunch")) %>%
spread(top_category, pct, fill = 0) %>%
filter(!Unknown == 1) %>%
mutate(pct = Unknown + `Executive Time` + Travel + Lunch) %>%
ggplot() +
aes(pct, tweets) +
geom_smooth(
method = "lm",
color = event_type_colors["Executive Time"],
fill = event_type_colors["Unknown"]
) +
geom_point(color = event_type_colors["Executive Time"]) +
scale_x_continuous(labels = scales::percent_format(10)) +
labs(
x = "Percent of Workday Dedicated to Downtime\n(Executive Time, Travel, Unknown)",
y = "Number of Tweets",
title = "Does Trump Tweet More When He Has More Downtime?",
caption = credit_caption(rtweet = TRUE))
```
</div>
`r lightbox_img(knitr::fig_chunk("tweets-by-exec-time", "png"))`
##### R code... {#code-tweets-by-not-meeting-event .js-expandmore .expand-for-code}
<div class="js-to_expand">
```{r tweets-by-not-meeting-event, fig.height=5, echo=TRUE}
djt_simple %>%
mutate(date = floor_date(created_at, "day")) %>%
group_by(date) %>%
count() %>%
rename(tweets = n) %>%
left_join(exec_time_total, ., by = "date") %>%
filter(top_category %in% c("Meeting", "Event", "Lunch")) %>%
group_by(date) %>%
summarize(pct = sum(pct), tweets = max(tweets)) %>%
filter(pct > 0) %>%
ggplot() +
aes(pct, tweets) +
geom_smooth(
method = "lm",
color = event_type_colors["Executive Time"],
fill = event_type_colors["Unknown"]
) +
geom_point(color = event_type_colors["Executive Time"]) +
scale_x_continuous(labels = scales::percent_format(10)) +
labs(x = "Percent of Workday Dedicated to Meetings, Lunches, or Events",
y = "Number of Tweets",
title = "Does Trump Tweet Less When He Does More \"Work\"?",
caption = credit_caption(rtweet = TRUE))
```
</div>
`r lightbox_img(knitr::fig_chunk("tweets-by-not-meeting-event", "png"))`
Finally, I wanted to explore the emotional valence of Trump's day-time tweeting.
Are his morning tweets angrier or more rant-driven?
Are his event-related tweets more positive?
To this end, I ran Trump's tweet text through the NRC sentiment dictionary using `get_nrc_sentiment()` from the `syuzhet` package.
This function returns an integer score from 0 to 10 for a range of positive and negative emotions.
##### R code... {#code-tweet-emotion-by-event .js-expandmore .expand-for-code}
<div class="js-to_expand">
```{r djt-tweet-sentiment, echo=TRUE}
djt_sentiment <-
djt_joined %>%
select(top_category, text) %>%
mutate(sentiment = map(text, syuzhet::get_nrc_sentiment)) %>%
unnest() %>%
gather(emotion, value, -top_category, -text)
djt_sentiment
djt_sentiment_mean <-
djt_sentiment %>%
group_by(top_category, emotion) %>%
summarize(value = mean(value)) %>%
mutate(emotion = fct_reorder(emotion, value)) %>%
filter(top_category != "Unknown")
djt_sentiment_mean
```
```{r tweet-emotion-by-event, echo=TRUE, fig.height=7}
emotions <- c(
"positive", "joy", "trust", "surprise", "anticipation",
"sadness", "anger", "fear", "disgust", "negative"
)
djt_sentiment %>%
mutate(emotion = factor(emotion, rev(emotions))) %>%
ggplot() +
aes(y = value, x = emotion, fill = top_category) +
# ggridges::geom_density_ridges() +
geom_boxplot(alpha = 0.7, color = "grey20", outlier.shape = NA) +
scale_fill_manual(values = event_type_colors) +
guides(fill = FALSE, color = FALSE) +
coord_flip() +
facet_wrap(~top_category, scales = "free_y") +
labs(y = "Sentiment Score", x = NULL,
title = "Emotions Expressed in Trump Tweets",
caption = credit_caption(rtweet = TRUE))
```
</div>
`r lightbox_img(knitr::fig_chunk("tweet-emotion-by-event", "png"))`
The result provides something of a profile of Trump's tweeting habits, but more analysis is needed to make sense of these sentiment values.
I wanted to look further into how these tweets were categorized by the sentiment dictionary, but by this point this post is already far too long and has consumed too much of my evenings and weekends, so I'll save it for another day.
***
Thanks for reading!
I'd love to hear your thoughts or feedback.
I'm [&commat;grrrck](https://twitter.com/grrrck) on Twitter.
You can’t perform that action at this time.