-
Notifications
You must be signed in to change notification settings - Fork 1
/
hardwax_bot.R
139 lines (100 loc) · 3.72 KB
/
hardwax_bot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# LOAD -----------------------------------------------------------
options(repos = "https://mran.microsoft.com")
if (!require('dplyr')) install.packages('dplyr')
if (!require('stringr')) install.packages('stringr')
if (!require('rtweet')) install.packages('rtweet')
library(dplyr)
library(stringr)
library(rtweet)
# load data
word_counts <- read.csv("https://raw.github.com/ewenme/hardwax_bot/master/Data/words.csv", stringsAsFactors = FALSE)
opener_counts <- read.csv("https://raw.github.com/ewenme/hardwax_bot/master/Data/openers.csv", stringsAsFactors = FALSE)
bigram_counts <- read.csv("https://raw.github.com/ewenme/hardwax_bot/master/Data/bigrams.csv", stringsAsFactors = FALSE)
trigram_counts <- read.csv("https://raw.github.com/ewenme/hardwax_bot/master/Data/trigrams.csv", stringsAsFactors = FALSE)
# set twitter token
twitter_token <- readRDS(gzcon(url("https://raw.github.com/ewenme/hardwax_bot/master/twitter_token.rds")))
# NEXT WORD PREDICTION -------------------------------------------
# capitalise first letter
firstup <- function(x) {
substr(x, 1, 1) <- toupper(substr(x, 1, 1))
x
}
# function to return third word
return_third_word <- function(woord1, woord2){
# sample a word to add to first two words
woord <- trigram_counts %>%
filter_(~word1 == woord1, ~word2 == woord2)
if(nrow(woord) > 0) {
woord <- sample_n(woord, 1, weight = n) %>%
.[["word3"]]
} else {
woord <- filter_(bigram_counts, ~word1 == woord2) %>%
sample_n(1, weight = n) %>%
.[["word2"]]
}
# print
woord
}
# SENTENCE GENERATOR ------------------------------------------
generate_sentence <- function(word1, word2, sentencelength, debug =FALSE){
# comma chance sample
commas <- sample(0:100, 1)
#input validation
if(sentencelength <3)stop("I need more to work with")
sentencelength <- sentencelength -2
# starting to add words
if(commas <= as.numeric(word1$comma_prob)) {
sentence <- paste(word1$word, ", ", word2$word, sep="")
} else {
sentence <- c(word1$word, word2$word)
}
woord1 <- word1$word
woord2 <- word2$word
for(i in seq_len(sentencelength)){
commas <- sample(0:100, 1)
if(debug == TRUE)print(i)
word <- return_third_word( woord1, woord2)
word <- left_join(as_data_frame(word), word_counts, by=c("value"="word"))
if(commas <= as.numeric(word$comma_prob)) {
sentence <- c(sentence, ", ", word$value[1])
} else {
sentence <- c(sentence, word$value[1])
}
woord1 <- woord2
woord2 <- word$value[1]
}
# paste sentence together
output <- paste(sentence, collapse = " ")
output <- str_replace_all(output, " ,", ",")
output <- str_replace_all(output, " ", " ")
# add tip sometimes
tip_n <- sample(1:20, 1)
if(tip_n %in% c(1, 2)){
output <- paste(output, "- TIP!")
} else if(tip_n %in% c(3, 4)){
output <- paste(output, "(one per customer)")
} else if(tip_n %in% c(5)){
output <- paste(output, "- Killer!")
} else if(tip_n %in% c(6, 7)){
output <- paste(output, "- Warmly Recommended!")
} else if(tip_n %in% c(8, 9)){
output <- paste(output, "- Highly Recommended!")
} else if(tip_n %in% c(10, 11)){
output <- paste(output, "(w/ download code)")
}
# print
firstup(output)
}
# REVIEW GENERATOR -------------------------------------------------
# generate review
dumb_hardwax <- function(x) {
a <- sample_n(opener_counts, size=1, weight = n)
b <- sample_n(word_counts, size=1, weight = n)
len <- sample(5:12, 1)
generate_sentence(word1=a, word2=b, sentencelength=len)
}
# TWEET --------------------------------------------------------
# create tweet
tweet_text <- dumb_hardwax()
# post tweet
post_tweet(status = tweet_text, token = twitter_token)