-
Notifications
You must be signed in to change notification settings - Fork 0
/
tt.R
133 lines (121 loc) · 4.35 KB
/
tt.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# Author: Dan Snow
# Date: Nov 25, 2017
# NOTE: You will almost certainly have to play around with timezones to get this
# script to work properly. Try the following commands, though be sure to change back.
Sys.setenv(TZ = "America/Chicago")
options(tz = "America/Chicago")
# ---Setup---
# Importing the necessary libraries
library(tidyverse)
library(lubridate)
library(jsonlite)
library(scales)
library(viridis)
library(KernSmooth)
library(MASS)
library(glue)
# Importing data from the Trump Twitter Archive Github
tt.years <- 2017
tt.git <- "https://github.com/bpb27/trump_tweet_data_archive/raw/master/condensed_{y}.json.zip"
# Downloading files based on a vector of URLs
map(tt.years, ~ glue(tt.git, y = tt.years)) %>%
flatten_chr() %>% unique() %>%
map(., download.file(., basename(.), method = "libcurl"))
# Unzipping files and combining them in a data frame
dir(pattern = "*.zip", full.names = TRUE) %>%
keep(~any(grepl("*.json", unzip(., list=TRUE)$Name))) %>%
map_df(function(x) {
temp <- tempdir()
fromJSON(unzip(x, grep(x, "*.json"), exdir = temp)) %>%
mutate(x, year = as.character(str_extract_all(x, "\\d+")))
}) -> tt.df
# Cleaning up
map(dir(pattern = "*.json.zip"), file.remove)
# ---Tweet Times---
# Converting the created_at time to POSIX, removing the date, and changing the timezone
tt.df$time <- as.POSIXct(tt.df$created_at, format = "%a %b %d %H:%M:%S", tz = "UTC")
tt.df$month <- format(tt.df$time, format = "%m")
tt.df$date <- as.POSIXct(paste(tt.df$year, tt.df$month, "01", sep = "/"),
format = "%Y/%m/%d", tz = "UTC")
tt.df$time <- format(tt.df$time, format = "%H:%M:%S", tz = "America/New_York")
tt.df$time <- as.POSIXct(tt.df$time, format = "%H:%M:%S", tz = "UTC")
tt.df <- tt.df[!is.na(tt.df$time),]
# ---Tweet Density---
# 1D density function for entire time period
# tt.density <- function(x) {
# den <- bkde(x = x)
# i <- findInterval(x, den$x)
# return(den$y[i])
# }
# tt.df$density <- tt.density(as.numeric(tt.df$time))
# density function for each month
# tt.df$density <- tt.df %>%
# group_by(date) %>%
# nest() %>%
# { map(.$data, ~ tt.density(as.numeric(.$time))) } %>%
# unlist()
# 2D density function which groups across months
tt.density <- function(x, y, n = 100) {
den <- kde2d(x = x, y = y, n = n)
dx <- findInterval(x, den$x)
dy <- findInterval(y, den$y)
dd <- cbind(dx, dy)
return(den$z[dd])
}
tt.df$density <- tt.density(as.numeric(tt.df$time), as.numeric(tt.df$date))
# ---Final ggplot----
tt.plot <- ggplot() +
geom_tile(
data = tt.df,
aes(date, time, color = density),
size = .3) +
geom_hline(
aes(yintercept = c(
as.POSIXct(paste(Sys.Date(), "6:00:00"), tz = "UTC"),
as.POSIXct(paste(Sys.Date(), "9:00:00"), tz = "UTC")),
linetype = "Start/Stop"),
color = "indianred",
size = 1.2,
show.legend = TRUE) +
scale_y_datetime(
labels = date_format("%H:%M"),
breaks = date_breaks("2 hour"),
expand = c(0, 0),
limits = c(
as.POSIXct(paste(Sys.Date() - 1, "18:00:00")),
as.POSIXct(paste(Sys.Date(), "18:00:00")))) +
scale_x_datetime(
breaks = date_breaks(
paste(round(interval(min(tt.df$date), now()) / months(1)) / 9, "months")),
labels = ifelse(length(tt.years) > 1, date_format("%b %y"), date_format("%b")),
expand = c(0, 0)) +
labs(
x = "Month",
y = "Time",
title = glue(
"Trump Tweet Density vs. Fox & Friends Airtime, {y}",
y = ifelse(
length(tt.years) > 1,
paste(min(tt.years), max(tt.years), sep = " - "),
tt.years)),
subtitle = "Tweets plotted by month and minute. Collected from trumptwitterarchive.com.",
color = "Tweet Density") +
scale_color_viridis(
breaks = c(
max(tt.df$density),
(max(tt.df$density) + min(tt.df$density)) / 2,
min(tt.df$density)),
labels = c("High", "Mid", "Low")) +
scale_linetype_manual(
name = "Fox & Friends",
values = c("Start/Stop" = "longdash")) +
theme_minimal() +
theme(
axis.text.x = element_text(size = 12, margin = margin(t = 8, unit = "pt")),
axis.text.y = element_text(size = 12),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 12, margin = margin(b = 8, unit = "pt")),
plot.margin = unit(c(10,10,20,10), "pt"))
ggsave("tt.png", plot = tt.plot)