This repository has been archived by the owner on Apr 11, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 164
/
comm.R
executable file
·240 lines (208 loc) · 7.22 KB
/
comm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
## twitter API has multiple methods of handling paging issues, not to mention the search API
## has a completely different interface. Trying to manage all of these below using one unified
## approach to actually sending the data back & receiving response and then providing multiple
## mechanisms to page
tw_from_response = function(response) {
## Will provide some basic error checking, as well as suppress
## warnings that always seem to come out of fromJSON, even
## in good cases.
out <- try(suppressWarnings(fromJSON(content(response, as="text", encoding="UTF-8"))), silent=TRUE)
if (inherits(out, "try-error")) {
stop("Error: Malformed response from server, was not JSON.\n",
"The most likely cause of this error is Twitter returning a character which\n",
"can't be properly parsed by R. Generally the only remedy is to wait long\n",
"enough for the offending character to disappear from searches (e.g. if\n",
"using searchTwitter()).")
}
return(out)
}
doAPICall = function(cmd, params=NULL, method="GET", retryCount=5,
retryOnRateLimit=0, debug=FALSE, ...) {
if (debug) {
browser()
}
if (!is.numeric(retryOnRateLimit)) {
stop("retryOnRateLimit must be a number")
}
if (!is.numeric(retryCount)) {
stop("retryCount must be a number")
}
recall_func = function(retryCount, rateLimitCount) {
return(doAPICall(cmd, params=params, method=method, retryCount=retryCount,
retryOnRateLimit=rateLimitCount, ...))
}
url = getAPIStr(cmd)
if (method == "POST") {
out = try(POST(url, config(token=get_oauth_sig()), body=params), silent=TRUE)
} else {
if (is.null(params)) {
query = NULL
} else {
query = lapply(params, function(x) URLencode(as.character(x)))
}
out = GET(url, query=query, config(token=get_oauth_sig()))
}
httr_status = out$status
http_message = http_status(out)$message
if (httr_status %in% c(500, 502)) {
print(http_message)
print(paste("This error is likely transient, retrying up to", retryCount, "more times ..."))
## These are typically fail whales or similar such things
Sys.sleep(1)
return(recall_func(retryCount - 1, rateLimitCount=retryOnRateLimit))
} else if (httr_status == 429) {
if (retryOnRateLimit > 0) {
## We're rate limited. Wait a while and try again
newRateLimit = retryOnRateLimit - 1
print(paste("Rate limited .... blocking for a minute and retrying up to", newRateLimit, "times ..."))
Sys.sleep(60)
return(recall_func(retryCount, newRateLimit))
} else {
## FIXME: very experimental - the idea is that if we're rate limited,
## just give a warning and return. This should result in rate limited
## operations returning the partial result
warning("Rate limit encountered & retry limit reached - returning partial results")
return(NULL)
}
} else if (httr_status == 401) {
stop("OAuth authentication error:\nThis most likely means that you have incorrectly called setup_twitter_oauth()'")
} else {
## Generic catch-all for any other errors
stop_for_status(out)
}
json = tw_from_response(out, ...)
if (length(json[["errors"]]) > 0) {
stop(json[["errors"]][[1]][["message"]])
}
out = json
}
setRefClass('twAPIInterface',
fields = list(
maxResults = 'integer'
),
methods = list(
initialize=function(...) {
maxResults <<- 100L
callSuper(...)
.self
},
tw_from_response = tw_from_response,
doAPICall = doAPICall
)
)
tint <- getRefClass('twAPIInterface')
tint$accessors(names(tint$fields()))
twInterfaceObj <- tint$new()
doPagedAPICall = function(cmd, num, params=NULL, method='GET', ...) {
if (num <= 0)
stop('num must be positive')
else
num <- as.integer(num)
maxResults <- twInterfaceObj$getMaxResults()
page <- 1
total <- num
count <- ifelse(num < maxResults, num, maxResults)
jsonList <- list()
params[['count']] <- count
while (total > 0) {
params[['page']] <- page
results = twInterfaceObj$doAPICall(cmd, params, method, ...)
if (is.null(results)) {
return(jsonList)
}
jsonList <- c(jsonList, results)
total <- total - count
page <- page + 1
}
jLen <- length(jsonList)
if ((jLen > 0) && (jLen > num))
jsonList <- jsonList[1:num]
jsonList
}
doCursorAPICall = function(cmd, type, num=NULL, params=NULL, method='GET', ...) {
cursor <- -1
if (!is.null(num)) {
if (num <= 0)
stop("num must be positive")
else
num <- as.integer(num)
}
vals <- character()
while(cursor != 0) {
params[['cursor']] <- cursor
curResults <- twInterfaceObj$doAPICall(cmd, params, method, ...)
if (is.null(curResults)) {
return(vals)
}
vals <- c(vals, curResults[[type]])
if ((!is.null(num)) && (length(vals) >= num))
break
cursor <- curResults[['next_cursor_str']]
}
if ((!is.null(num)) && (length(vals) > num))
vals <- vals[1:num]
vals
}
doRppAPICall = function(cmd, num, params, ...) {
if (! 'q' %in% names(params))
stop("parameter 'q' must be supplied")
maxResults <- twInterfaceObj$getMaxResults()
params[['count']] <- ifelse(num < maxResults, num, maxResults)
curDiff <- num
jsonList <- list()
ids = list()
while (curDiff > 0) {
fromJSON <- twInterfaceObj$doAPICall(cmd, params, 'GET', ...)
if (is.null(fromJSON)) {
return(jsonList)
}
newList <- fromJSON$statuses
curIds = sapply(newList, function(x) x[["id"]])
dups = which(ids %in% ids)
if (length(dups) > 0) {
curIds = curIds[-dups]
newList = newList[-dups]
}
if (length(curIds) == 0) {
break
}
jsonList <- c(jsonList, newList)
curDiff <- num - length(jsonList)
if ((curDiff > 0)) { #&& (length(newList) == params[["count"]])) {
params[["max_id"]] = as.character(as.integer64(min(curIds)) - 1)
} else {
break
}
}
if (length(jsonList) > num) {
jsonList = jsonList[seq_len(num)]
}
if (length(jsonList) < num) {
warning(num, " tweets were requested but the API can only return ", length(jsonList))
}
return(jsonList)
}
twitterDateToPOSIX <- function(dateStr) {
## In typical twitter fashion, there are multiple ways that they
## spit dates back at us. First, let's take a look at unix
## epoch time, and then try a few data string formats
dateInt <- suppressWarnings(as.numeric(dateStr))
## Locale must be set to something american-y in order to properly
## parse the Twitter dates. Get the current LC_TIME, reset it on
## exit and then change the locale
curLocale <- Sys.getlocale("LC_TIME")
on.exit(Sys.setlocale("LC_TIME", curLocale), add=TRUE)
Sys.setlocale("LC_TIME", "C")
if (!is.na(dateInt)) {
posDate <- as.POSIXct(dateInt, tz='UTC', origin='1970-01-01')
} else {
posDate <- as.POSIXct(dateStr, tz='UTC',
format="%a %b %d %H:%M:%S +0000 %Y")
## try again if necessary
if (is.na(posDate))
posDate <- as.POSIXct(dateStr, tz='UTC',
format="%a, %d %b %Y %H:%M:%S +0000")
}
## might still be NA, but we tried
return(posDate)
}