Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
95 lines (71 sloc) 3.09 KB
#Twitter sentiment analysis w/ lexicons
#3.14.18
#install other packages
install.package('rtweet')
install.packages('tidyverse')
install.packages('maps')
install.packages('tidytext')
install.packages('httpuv')
devtools::install_github("ropensci/tokenizers")
#load libraries
library(rtweet)
library(tidyverse)
library(maps)
library(tidytext)
library(httpuv)
#first establish authentication. Replace the dummy values below with your own
#if you forgot to set up a Twitter app or don't know how to get these values see the link where you downloaded this script
create_token(
app = "my_twitter_research_app",
consumer_key = "XYznzPFOFZR2a39FwWKN1Jp41",
consumer_secret = "CtkGEWmSevZqJuKl6HHrBxbCybxI1xGLqrD5ynPd9jG0SoHZbD",
access_token = "9551451262-wK2EmA942kxZYIwa5LMKZoQA4Xc2uyIiEwu2YXL",
access_secret = "9vpiSGKg1fIPQtxc5d5ESiFlZQpfbknEN1f1m2xe5byw7")
##basic sentiment analysis********************
#get Trump's most recent 100 tweets
trump <- get_timelines("realdonaldtrump", n = 500)
#tokenize and clean text
trump_tidy <- trump %>% unnest_tokens(word, text)
#perform sentiment analysis (valence) using AFINN dictionary
#note there are other lexicons you can call see ?get_sentiments for details
t_sent <- trump_tidy %>% left_join(get_sentiments('afinn'))
#plot most frequent negative or positive words
#score < 1 = negative sentiment, score < 1 = negative sentiment
t_sent %>% filter(score < 1) %>% count(word) %>%
top_n(15) %>% mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n)) +
geom_col(show.legend = FALSE) +
coord_flip() +
theme_bw() +
theme(text = element_text(size=15))
#now let's compare Trump to someone else
ocasio <- get_timelines("AOC", n = 500)
#tokenize and clean text
ocasio_tidy <- ocasio %>% unnest_tokens(word, text)
#perform sentiment analysis (valence) using AFINN dictionary
o_sent <- ocasio_tidy %>% left_join(get_sentiments('afinn'))
#compare Trump and Ocasio most frequent negative words
#first let's pull their top 10 neg emo words each
t_freqemo <- t_sent %>% filter(score < 1) %>% count(word) %>%
top_n(10) %>% mutate(word = reorder(word, n))
t_freqemo <- t_freqemo %>% mutate(person = "Trump")
o_freqemo <- o_sent %>% filter(score < 1) %>% count(word) %>%
top_n(10) %>% mutate(word = reorder(word, n))
o_freqemo <- o_freqemo %>% mutate(person = "Ocasio")
#combine data sets for plot
t_o <- rbind(t_freqemo, o_freqemo)
#make a nice plot
t_o %>% ggplot(aes(word, n, fill = person)) +
scale_fill_manual(values = c("dodgerblue2", "firebrick2")) +
geom_col(show.legend = FALSE) +
facet_wrap(~ person, scales = "free") +
coord_flip() +
theme_bw()+
labs(y = "Negative Word", x = "Frequency") +
theme(axis.title = element_text(face="bold"))
#if one of your panes is not plotting in order of frequency try this then run plot code again
t_o <- t_o %>% mutate(word = factor(word)) %>%
mutate(word = ifelse(person == 'Trump', str_c(' ',word), str_c('',word))) %>%
mutate(word = fct_reorder(word, n))
#save hi-res version
ggsave("Trump_Ocasio_NegLanguage.jpg")
You can’t perform that action at this time.