## Vergleich des Verhaltens von Politikern in Sozialen Medien

In [None]:
library(lubridate)
library(ggplot2)
library(dplyr)
library(readr)
library(magrittr)
library(scales)
library(tidytext)
library(stringr)
library(tidyr)
source("SentiWS_v1.8c/SentiWS.R")

In [None]:
tweets_1.csv <- read_csv("twitter_output/twitter_1.csv")
tweets_2.csv <- read_csv("twitter_output/twitter_2.csv")

In [None]:
tweets <- bind_rows(tweets_1.csv %>% mutate(collection = "b_riexinger"),
                    tweets_2.csv %>% mutate(collection = "Alice_Weidel")) 

In [None]:
head(tweets[, c("date", "username", "text")], n = 5)

In [None]:
ggplot(tweets, aes(x = date, fill = collection)) +
  geom_histogram(position = "identity", bins = 20, show.legend = FALSE) +
  facet_wrap(~collection, ncol = 1)

In [None]:
stoppwords.de <-read.table("SentiWS_v1.8c/stopp_words_de.csv")$V1 

In [None]:
remove_reg <- "&amp;|&lt;|&gt;"
tidy_tweets <- tweets %>% 
  filter(!str_detect(text, "^RT")) %>%
  unnest_tokens(word, text, token = "tweets") %>%
  filter(!word %in% stoppwords.de,
         !word %in% str_remove_all(stoppwords.de, "'"),
         str_detect(word, "[a-z]"),
         nchar(word) > 2,
        )

head(tidy_tweets, n = 5)

In [None]:
remove_reg <- "&amp;|&lt;|&gt;"
tidy_tweets2 <- tweets %>% 
  filter(!str_detect(text, "^RT")) %>%
  unnest_tokens(word, text, token = "tweets") %>%
  filter(!word %in% stoppwords.de,
         !word %in% str_remove_all(stoppwords.de, "'"),
         str_detect(word, "#[a-z]"),
         nchar(word) > 2,
        )

head(tidy_tweets2, n = 5)
tidy_tweets <- tidy_tweets2

In [None]:
frequency <- tidy_tweets %>% 
  group_by(collection) %>% 
  count(word, sort = TRUE) %>% 
  left_join(tidy_tweets %>% 
              group_by(collection) %>% 
              summarise(total = n())) %>%
  mutate(freq = n/total)

In [None]:
# write.csv(unique(frequency$word),"SentiWS_v1.8c/stopp_words_de.csv", row.names = FALSE)
# stoppwords.de <- read.table("SentiWS_v1.8c/stopp_words_de.csv")

In [None]:
frequency <- frequency %>% 
  select(collection, word, freq) %>% 
  spread(collection, freq) %>% arrange(Alice_Weidel, b_riexinger)

frequency

In [None]:
ggplot(frequency, aes(Alice_Weidel, b_riexinger)) +
  geom_jitter(alpha = 0.1, size = 2.5, width = 0.25, height = 0.25) +
  geom_text(aes(label = word), check_overlap = TRUE, vjust = 1.5) +
  scale_x_log10(labels = percent_format()) +
  scale_y_log10(labels = percent_format()) +
  geom_abline(color = "red")

## Sentiment Analyse

In [None]:
SentiWS <- read.SentiWS()

head(SentiWS, n = 5)