# 5. Plots

**Some plots to obtain a quick impression of the data.**

## Libraries

In [None]:
library(tidyverse)

## Load final dataframe

In [None]:
adata <- readRDS(file = "result_df.rds")
glimpse(adata)

## Descriptives

### Missing tweets

In [None]:
users_n <- adata %>% group_by(user_id) %>% summarize(n_tweets = n())
sum_tweets <- nrow(adata)
sum_del <- sum(adata$missing == 1)

not_found <- length(which(adata$error == "Not Found Error"))
unauth <- length(which(adata$error == "Authorization Error"))


errors <- adata %>% group_by(error) %>% summarise(n = n())
errors

print(paste0("Sum deleted: ", sum_del, " of ", sum_tweets, " (", round(sum_del/sum_tweets*100, digits =2),
            "%)"))

print(paste0("Sum not found: ", not_found, " (", round(not_found/nrow(adata)*100, digits =2), "%)"))
print(paste0("Sum unauthorized: ", unauth, " (", round(unauth/nrow(adata)*100, digits =2), "%)"))


print(paste("Number of users:", nrow(users_n)))

## Plots

### heavy tweeter

In [None]:
heavy_tweeter <- adata %>% 
    group_by(user_id) %>%
    summarise(total_tweets = n()) %>% 
    arrange(desc(total_tweets)) %>%
    slice(1:50)

ht <- ggplot(data = heavy_tweeter, 
             aes(x = reorder(user_id, -total_tweets), y = total_tweets)) +
    geom_col() +
    theme(axis.text.x = element_text(angle = 90)) +
    labs(x = "user_id", title = paste("N=", nrow(heavy_tweeter)))

ht

### heavy deleter (absolute)

In [None]:
heavy_del_abs <- adata %>% 
    group_by(user_id) %>%
    summarise(deleted_tweets = sum(missing == 1)) %>% 
    arrange(desc(deleted_tweets)) %>%
    slice(1:50)

ht <- ggplot(data = heavy_del_abs, 
             aes(x = reorder(user_id, -deleted_tweets), y = deleted_tweets)) +
    geom_col() +
    theme(axis.text.x = element_text(angle = 90)) +
    labs(x = "user_id", title = paste("N =", nrow(heavy_del_abs)))

ht

### heavy deleter (relative)

In [None]:
heavy_del_rel <- adata %>% 
    group_by(user_id) %>%
    summarise(total_tweets = n(),
              deleted_tweets = sum(missing == 1)) %>% 
    mutate(percent_deleted = round((deleted_tweets/total_tweets)*100, digits = 2)) %>%
    arrange(desc(percent_deleted)) %>%
    slice()

ht <- ggplot(data = heavy_del_rel, 
             aes(x = reorder(user_id, -percent_deleted), y = percent_deleted)) +
    geom_col() +
    theme(axis.text.x = element_text(angle = 90)) +
    labs(x = "user_id", title = paste("N =", nrow(heavy_del_rel)))

ht

## Timeline

### 1. All users

In [None]:
del_timeline <- adata %>%
    mutate(date = lubridate::as_date(createdAt)) %>%
    group_by(date) %>% 
    summarise(percent_deleted = round(mean(missing == 1), digits=2))
 
del_t <- ggplot(data = del_timeline,
                aes(x = date, y = percent_deleted)) +
                geom_smooth(span = 0.1, colour = "red") +
                geom_area(color="black", fill = "grey", alpha = 0) +
                labs(title = "All users")
        
del_t

### 2. Include selective deleters, exclude inactive users
- Selective deleters: percentage deleted tweets < 50%
- Inactive users: numer of tweets <= 10)
- Line gets smoother

In [None]:
del_timeline <- adata %>% 
    dplyr::filter(usage != "inactive_user") %>%
    dplyr::filter(deletion_behaviour == "selective_deleter") %>%
    mutate(date = lubridate::as_date(createdAt)) %>%
    group_by(date) %>%
    summarise(percent_deleted = round(mean(missing == 1), digits=3))

del_t <- ggplot(data = del_timeline,
                aes(x = date, y = percent_deleted)) +
                geom_smooth(span = 0.1, colour = "red") +
                geom_area(color="black", fill = "grey", alpha = 0) +
                labs(title = "Selective deleters (% deleted tweets < 50%)")
        
del_t

### 3. Include selective keepers, exclude inactive users
- Selective keepers: percentage of deleted tweets > 50%
- Inactive users: numer of tweets <= 10)
- Larger peaks

In [None]:
del_timeline <- adata %>% 
    dplyr::filter(usage != "inactive_user") %>%
    dplyr::filter(deletion_behaviour == "selective_keeper") %>%
    #table(del_timeline$screen_name) # about 45 accounts only
    mutate(date = lubridate::as_date(createdAt)) %>%
    group_by(date) %>%
    summarise(percent_deleted = round(mean(missing == 1), digits=3))


del_t <- ggplot(data = del_timeline,
                aes(x = date, y = percent_deleted)) +
                geom_smooth(span = 0.1, colour = "red") +
                geom_area(color="black", fill = "grey", alpha = 0) +
                labs(title = "Selective keepers (% deleted tweets > 50%)")
        
del_t